xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision 671cb588d292443550200b2bf7b422edb674cf3f)
1 /* Using Modified Sparse Row (MSR) storage.
2 See page 85, "Iterative Methods ..." by Saad. */
3 
4 /*$Id: sbaijfact.c,v 1.47 2000/11/07 02:00:25 hzhang Exp hzhang $*/
5 /*
6     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
7 */
8 #include "sbaij.h"
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/ilu.h"
12 #include "include/petscis.h"
13 
14 #undef __FUNC__
15 #define __FUNC__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
16 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
17 {
18   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
19   int         *rip,ierr,i,mbs = a->mbs,*ai,*aj;
20   int         *jutmp,bs = a->bs,bs2=a->bs2;
21   int         m,nzi,realloc = 0;
22   int         *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
23   PetscTruth  perm_identity;
24 
25   PetscFunctionBegin;
26   if (!perm) {
27     ierr = ISCreateStride(PETSC_COMM_SELF,mbs,0,1,&perm);CHKERRQ(ierr);
28   }
29   PetscValidHeaderSpecific(perm,IS_COOKIE);
30 
31   /* check whether perm is the identity mapping */
32   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
33   if (!perm_identity) a->permute = PETSC_TRUE;
34 
35   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
36 
37   if (perm_identity){ /* without permutation */
38     ai = a->i; aj = a->j;
39   } else {            /* non-trivial permutation */
40     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRA(ierr);
41     ai = a->inew; aj = a->jnew;
42   }
43 
44   /* initialization */
45   /* Don't know how many column pointers are needed so estimate.
46      Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */
47   iu   = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(iu);
48   umax = (int)(f*ai[mbs] + 1); umax += mbs + 1;
49   ju   = (int*)PetscMalloc(umax*sizeof(int));CHKPTRQ(ju);
50   iu[0] = mbs+1;
51   juptr = mbs;
52   jl =  (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
53   q  =  (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(q);
54   for (i=0; i<mbs; i++){
55     jl[i] = mbs; q[i] = 0;
56   }
57 
58   /* for each row k */
59   for (k=0; k<mbs; k++){
60     nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
61     q[k] = mbs;
62     /* initialize nonzero structure of k-th row to row rip[k] of A */
63     jmin = ai[rip[k]];
64     jmax = ai[rip[k]+1];
65     for (j=jmin; j<jmax; j++){
66       vj = rip[aj[j]]; /* col. value */
67       if(vj > k){
68         qm = k;
69         do {
70           m  = qm; qm = q[m];
71         } while(qm < vj);
72         if (qm == vj) {
73           printf(" error: duplicate entry in A\n"); break;
74         }
75         nzk++;
76         q[m] = vj;
77         q[vj] = qm;
78       } /* if(vj > k) */
79     } /* for (j=jmin; j<jmax; j++) */
80 
81     /* modify nonzero structure of k-th row by computing fill-in
82        for each row i to be merged in */
83     i = k;
84     i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */
85     /* printf(" next pivot row i=%d\n",i); */
86     while (i < mbs){
87       /* merge row i into k-th row */
88       nzi = iu[i+1] - (iu[i]+1);
89       jmin = iu[i] + 1; jmax = iu[i] + nzi;
90       qm = k;
91       for (j=jmin; j<jmax+1; j++){
92         vj = ju[j];
93         do {
94           m = qm; qm = q[m];
95         } while (qm < vj);
96         if (qm != vj){
97          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
98         }
99       }
100       i = jl[i]; /* next pivot row */
101     }
102 
103     /* add k to row list for first nonzero element in k-th row */
104     if (nzk > 0){
105       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
106       jl[k] = jl[i]; jl[i] = k;
107     }
108     iu[k+1] = iu[k] + nzk;   /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/
109 
110     /* allocate more space to ju if needed */
111     if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax);
112       /* estimate how much additional space we will need */
113       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
114       /* just double the memory each time */
115       maxadd = umax;
116       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
117       umax += maxadd;
118 
119       /* allocate a longer ju */
120       jutmp = (int*)PetscMalloc(umax*sizeof(int));CHKPTRQ(jutmp);
121       ierr  = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
122       ierr  = PetscFree(ju);CHKERRQ(ierr);
123       ju    = jutmp;
124       realloc++; /* count how many times we realloc */
125     }
126 
127     /* save nonzero structure of k-th row in ju */
128     i=k;
129     jumin = juptr + 1; juptr += nzk;
130     for (j=jumin; j<juptr+1; j++){
131       i=q[i];
132       ju[j]=i;
133     }
134   }
135 
136   if (ai[mbs] != 0) {
137     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
138     PLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
139     PLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af);
140     PLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af);
141     PLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
142   } else {
143      PLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
144   }
145 
146   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
147   ierr = PetscFree(q);CHKERRQ(ierr);
148   ierr = PetscFree(jl);CHKERRQ(ierr);
149 
150   /* put together the new matrix */
151   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
152   /* PLogObjectParent(*B,iperm); */
153   b = (Mat_SeqSBAIJ*)(*B)->data;
154   ierr = PetscFree(b->imax);CHKERRQ(ierr);
155   b->singlemalloc = PETSC_FALSE;
156   /* the next line frees the default space generated by the Create() */
157   ierr = PetscFree(b->a);CHKERRQ(ierr);
158   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
159   b->a          = (MatScalar*)PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2);CHKPTRQ(b->a);
160   b->j          = ju;
161   b->i          = iu;
162   b->diag       = 0;
163   b->ilen       = 0;
164   b->imax       = 0;
165   b->row        = perm;
166   ierr          = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
167   b->icol       = perm;
168   ierr          = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
169   b->solve_work = (Scalar*)PetscMalloc((bs*mbs+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
170   /* In b structure:  Free imax, ilen, old a, old j.
171      Allocate idnew, solve_work, new a, new j */
172   PLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
173   b->s_maxnz = b->s_nz = iu[mbs];
174 
175   (*B)->factor                 = FACTOR_CHOLESKY;
176   (*B)->info.factor_mallocs    = realloc;
177   (*B)->info.fill_ratio_given  = f;
178   if (ai[mbs] != 0) {
179     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
180   } else {
181     (*B)->info.fill_ratio_needed = 0.0;
182   }
183 
184   if (perm_identity){
185     switch (bs) {
186       case 1:
187         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
188         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
189         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
190         break;
191       case 2:
192         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
193         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
194         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
195         break;
196       case 3:
197         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
198         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
199         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
200         break;
201       case 4:
202         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
203         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
204         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
205         break;
206       case 5:
207         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
208         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
209         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
210         break;
211       case 6:
212         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
213         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
214         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
215         break;
216       case 7:
217         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
218         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
219         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
220       break;
221       default:
222         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
223         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
224         PLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
225       break;
226     }
227   }
228 
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNC__
233 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
234 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
235 {
236   Mat                C = *B;
237   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
238   IS                 perm = b->row;
239   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
240   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
241   int                bs=a->bs,bs2 = a->bs2;
242   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
243   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
244   MatScalar          *W,*work;
245   int                *pivots;
246 
247   PetscFunctionBegin;
248 
249   /* initialization */
250   rtmp  = (MatScalar*)PetscMalloc(bs2*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
251   ierr  = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
252   il    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
253   jl    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
254   for (i=0; i<mbs; i++) {
255     jl[i] = mbs; il[0] = 0;
256   }
257   dk    = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(dk);
258   uik   = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(uik);
259   W     = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(W);
260   work  = (MatScalar*)PetscMalloc(bs*sizeof(MatScalar));CHKPTRQ(work);
261   pivots= (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(pivots);
262 
263   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
264 
265   /* check permutation */
266   if (!a->permute){
267     ai = a->i; aj = a->j; aa = a->a;
268   } else {
269     ai = a->inew; aj = a->jnew;
270     aa = (MatScalar*)PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
271     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
272     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
273     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
274 
275     for (i=0; i<mbs; i++){
276       jmin = ai[i]; jmax = ai[i+1];
277       for (j=jmin; j<jmax; j++){
278         while (a2anew[j] != j){
279           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
280           for (k1=0; k1<bs2; k1++){
281             dk[k1]       = aa[k*bs2+k1];
282             aa[k*bs2+k1] = aa[j*bs2+k1];
283             aa[j*bs2+k1] = dk[k1];
284           }
285         }
286         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
287         if (i > aj[j]){
288           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
289           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
290           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
291           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
292             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
293           }
294         }
295       }
296     }
297     ierr = PetscFree(a2anew);CHKERRA(ierr);
298   }
299 
300   /* for each row k */
301   for (k = 0; k<mbs; k++){
302 
303     /*initialize k-th row with elements nonzero in row perm(k) of A */
304     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
305     if (jmin < jmax) {
306       ap = aa + jmin*bs2;
307       for (j = jmin; j < jmax; j++){
308         vj = perm_ptr[aj[j]];         /* block col. index */
309         rtmp_ptr = rtmp + vj*bs2;
310         for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
311       }
312     }
313 
314     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
315     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316     i = jl[k]; /* first row to be added to k_th row  */
317 
318     while (i < mbs){
319       nexti = jl[i]; /* next row to be added to k_th row */
320 
321       /* compute multiplier */
322       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
323 
324       /* uik = -inv(Di)*U_bar(i,k) */
325       diag = ba + i*bs2;
326       u    = ba + ili*bs2;
327       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
328       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
329 
330       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
331       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
332 
333       /* update -U(i,k) */
334       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
335 
336       /* add multiple of row i to k-th row ... */
337       jmin = ili + 1; jmax = bi[i+1];
338       if (jmin < jmax){
339         for (j=jmin; j<jmax; j++) {
340           /* rtmp += -U(i,k)^T * U_bar(i,j) */
341           rtmp_ptr = rtmp + bj[j]*bs2;
342           u = ba + j*bs2;
343           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
344         }
345 
346         /* ... add i to row list for next nonzero entry */
347         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
348         j     = bj[jmin];
349         jl[i] = jl[j]; jl[j] = i; /* update jl */
350       }
351       i = nexti;
352     }
353 
354     /* save nonzero entries in k-th row of U ... */
355 
356     /* invert diagonal block */
357     diag = ba+k*bs2;
358     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
359     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
360 
361     jmin = bi[k]; jmax = bi[k+1];
362     if (jmin < jmax) {
363       for (j=jmin; j<jmax; j++){
364          vj = bj[j];           /* block col. index of U */
365          u   = ba + j*bs2;
366          rtmp_ptr = rtmp + vj*bs2;
367          for (k1=0; k1<bs2; k1++){
368            *u++        = *rtmp_ptr;
369            *rtmp_ptr++ = 0.0;
370          }
371       }
372 
373       /* ... add k to row list for first nonzero entry in k-th row */
374       il[k] = jmin;
375       i     = bj[jmin];
376       jl[k] = jl[i]; jl[i] = k;
377     }
378   }
379 
380   ierr = PetscFree(rtmp);CHKERRQ(ierr);
381   ierr = PetscFree(il);CHKERRQ(ierr);
382   ierr = PetscFree(jl);CHKERRQ(ierr);
383   ierr = PetscFree(dk);CHKERRQ(ierr);
384   ierr = PetscFree(uik);CHKERRQ(ierr);
385   ierr = PetscFree(W);CHKERRQ(ierr);
386   ierr = PetscFree(work);CHKERRQ(ierr);
387   ierr = PetscFree(pivots);CHKERRQ(ierr);
388   if (a->permute){
389     ierr = PetscFree(aa);CHKERRQ(ierr);
390   }
391 
392   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
393   C->factor    = FACTOR_CHOLESKY;
394   C->assembled = PETSC_TRUE;
395   C->preallocated = PETSC_TRUE;
396   PLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
402 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
403 {
404   Mat                C = *B;
405   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
406   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
407   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
408   int                bs=a->bs,bs2 = a->bs2;
409   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
410   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
411   MatScalar          *W,*work;
412   int                *pivots;
413 
414   PetscFunctionBegin;
415 
416   /* initialization */
417 
418   rtmp  = (MatScalar*)PetscMalloc(bs2*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
419   ierr  = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
420   il    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
421   jl    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
422   for (i=0; i<mbs; i++) {
423     jl[i] = mbs; il[0] = 0;
424   }
425   dk    = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(dk);
426   uik   = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(uik);
427   W     = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(W);
428   work  = (MatScalar*)PetscMalloc(bs*sizeof(MatScalar));CHKPTRQ(work);
429   pivots= (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(pivots);
430 
431   ai = a->i; aj = a->j; aa = a->a;
432 
433   /* for each row k */
434   for (k = 0; k<mbs; k++){
435 
436     /*initialize k-th row with elements nonzero in row k of A */
437     jmin = ai[k]; jmax = ai[k+1];
438     if (jmin < jmax) {
439       ap = aa + jmin*bs2;
440       for (j = jmin; j < jmax; j++){
441         vj = aj[j];         /* block col. index */
442         rtmp_ptr = rtmp + vj*bs2;
443         for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
444       }
445     }
446 
447     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
448     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
449     i = jl[k]; /* first row to be added to k_th row  */
450 
451     while (i < mbs){
452       nexti = jl[i]; /* next row to be added to k_th row */
453 
454       /* compute multiplier */
455       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
456 
457       /* uik = -inv(Di)*U_bar(i,k) */
458       diag = ba + i*bs2;
459       u    = ba + ili*bs2;
460       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
461       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
462 
463       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
464       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
465 
466       /* update -U(i,k) */
467       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
468 
469       /* add multiple of row i to k-th row ... */
470       jmin = ili + 1; jmax = bi[i+1];
471       if (jmin < jmax){
472         for (j=jmin; j<jmax; j++) {
473           /* rtmp += -U(i,k)^T * U_bar(i,j) */
474           rtmp_ptr = rtmp + bj[j]*bs2;
475           u = ba + j*bs2;
476           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
477         }
478 
479         /* ... add i to row list for next nonzero entry */
480         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
481         j     = bj[jmin];
482         jl[i] = jl[j]; jl[j] = i; /* update jl */
483       }
484       i = nexti;
485     }
486 
487     /* save nonzero entries in k-th row of U ... */
488 
489     /* invert diagonal block */
490     diag = ba+k*bs2;
491     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
492     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
493 
494     jmin = bi[k]; jmax = bi[k+1];
495     if (jmin < jmax) {
496       for (j=jmin; j<jmax; j++){
497          vj = bj[j];           /* block col. index of U */
498          u   = ba + j*bs2;
499          rtmp_ptr = rtmp + vj*bs2;
500          for (k1=0; k1<bs2; k1++){
501            *u++        = *rtmp_ptr;
502            *rtmp_ptr++ = 0.0;
503          }
504       }
505 
506       /* ... add k to row list for first nonzero entry in k-th row */
507       il[k] = jmin;
508       i     = bj[jmin];
509       jl[k] = jl[i]; jl[i] = k;
510     }
511   }
512 
513   ierr = PetscFree(rtmp);CHKERRQ(ierr);
514   ierr = PetscFree(il);CHKERRQ(ierr);
515   ierr = PetscFree(jl);CHKERRQ(ierr);
516   ierr = PetscFree(dk);CHKERRQ(ierr);
517   ierr = PetscFree(uik);CHKERRQ(ierr);
518   ierr = PetscFree(W);CHKERRQ(ierr);
519   ierr = PetscFree(work);CHKERRQ(ierr);
520   ierr = PetscFree(pivots);CHKERRQ(ierr);
521 
522   C->factor    = FACTOR_CHOLESKY;
523   C->assembled = PETSC_TRUE;
524   C->preallocated = PETSC_TRUE;
525   PLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
526   PetscFunctionReturn(0);
527 }
528 
529 /* Version for when blocks are 7 by 7 */
530 #undef __FUNC__
531 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_7"
532 int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat A,Mat *B)
533 {
534   Mat                C = *B;
535   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
536   IS                 perm = b->row;
537   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
538   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
539   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
540   MatScalar          *u,*d,*w,*wp;
541 
542   PetscFunctionBegin;
543   /* initialization */
544   w  = (MatScalar*)PetscMalloc(49*mbs*sizeof(MatScalar));CHKPTRQ(w);
545   ierr = PetscMemzero(w,49*mbs*sizeof(MatScalar));CHKERRQ(ierr);
546   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
547   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
548   for (i=0; i<mbs; i++) {
549     jl[i] = mbs; il[0] = 0;
550   }
551   dk    = (MatScalar*)PetscMalloc(49*sizeof(MatScalar));CHKPTRQ(dk);
552   uik   = (MatScalar*)PetscMalloc(49*sizeof(MatScalar));CHKPTRQ(uik);
553   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
554 
555   /* check permutation */
556   if (!a->permute){
557     ai = a->i; aj = a->j; aa = a->a;
558   } else {
559     ai = a->inew; aj = a->jnew;
560     aa = (MatScalar*)PetscMalloc(49*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
561     ierr = PetscMemcpy(aa,a->a,49*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
562     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
563     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
564 
565     for (i=0; i<mbs; i++){
566       jmin = ai[i]; jmax = ai[i+1];
567       for (j=jmin; j<jmax; j++){
568         while (a2anew[j] != j){
569           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
570           for (k1=0; k1<49; k1++){
571             dk[k1]       = aa[k*49+k1];
572             aa[k*49+k1] = aa[j*49+k1];
573             aa[j*49+k1] = dk[k1];
574           }
575         }
576         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
577         if (i > aj[j]){
578           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
579           ap = aa + j*49;                     /* ptr to the beginning of j-th block of aa */
580           for (k=0; k<49; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
581           for (k=0; k<7; k++){               /* j-th block of aa <- dk^T */
582             for (k1=0; k1<7; k1++) *ap++ = dk[k + 7*k1];
583           }
584         }
585       }
586     }
587     ierr = PetscFree(a2anew);CHKERRA(ierr);
588   }
589 
590   /* for each row k */
591   for (k = 0; k<mbs; k++){
592 
593     /*initialize k-th row with elements nonzero in row perm(k) of A */
594     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
595     if (jmin < jmax) {
596       ap = aa + jmin*49;
597       for (j = jmin; j < jmax; j++){
598         vj = perm_ptr[aj[j]];         /* block col. index */
599         wp = w + vj*49;
600         for (i=0; i<49; i++) *wp++ = *ap++;
601       }
602     }
603 
604     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
605     ierr = PetscMemcpy(dk,w+k*49,49*sizeof(MatScalar));CHKERRQ(ierr);
606     i = jl[k]; /* first row to be added to k_th row  */
607 
608     while (i < mbs){
609       nexti = jl[i]; /* next row to be added to k_th row */
610 
611       /* compute multiplier */
612       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
613 
614       /* uik = -inv(Di)*U_bar(i,k) */
615       d = ba + i*49;
616       u    = ba + ili*49;
617 
618       uik[0] = -(d[0]*u[0] + d[7]*u[1]+ d[14]*u[2]+ d[21]*u[3]+ d[28]*u[4]+ d[35]*u[5]+ d[42]*u[6]);
619       uik[1] = -(d[1]*u[0] + d[8]*u[1]+ d[15]*u[2]+ d[22]*u[3]+ d[29]*u[4]+ d[36]*u[5]+ d[43]*u[6]);
620       uik[2] = -(d[2]*u[0] + d[9]*u[1]+ d[16]*u[2]+ d[23]*u[3]+ d[30]*u[4]+ d[37]*u[5]+ d[44]*u[6]);
621       uik[3] = -(d[3]*u[0]+ d[10]*u[1]+ d[17]*u[2]+ d[24]*u[3]+ d[31]*u[4]+ d[38]*u[5]+ d[45]*u[6]);
622       uik[4] = -(d[4]*u[0]+ d[11]*u[1]+ d[18]*u[2]+ d[25]*u[3]+ d[32]*u[4]+ d[39]*u[5]+ d[46]*u[6]);
623       uik[5] = -(d[5]*u[0]+ d[12]*u[1]+ d[19]*u[2]+ d[26]*u[3]+ d[33]*u[4]+ d[40]*u[5]+ d[47]*u[6]);
624       uik[6] = -(d[6]*u[0]+ d[13]*u[1]+ d[20]*u[2]+ d[27]*u[3]+ d[34]*u[4]+ d[41]*u[5]+ d[48]*u[6]);
625 
626       uik[7] = -(d[0]*u[7] + d[7]*u[8]+ d[14]*u[9]+ d[21]*u[10]+ d[28]*u[11]+ d[35]*u[12]+ d[42]*u[13]);
627       uik[8] = -(d[1]*u[7] + d[8]*u[8]+ d[15]*u[9]+ d[22]*u[10]+ d[29]*u[11]+ d[36]*u[12]+ d[43]*u[13]);
628       uik[9] = -(d[2]*u[7] + d[9]*u[8]+ d[16]*u[9]+ d[23]*u[10]+ d[30]*u[11]+ d[37]*u[12]+ d[44]*u[13]);
629       uik[10]= -(d[3]*u[7]+ d[10]*u[8]+ d[17]*u[9]+ d[24]*u[10]+ d[31]*u[11]+ d[38]*u[12]+ d[45]*u[13]);
630       uik[11]= -(d[4]*u[7]+ d[11]*u[8]+ d[18]*u[9]+ d[25]*u[10]+ d[32]*u[11]+ d[39]*u[12]+ d[46]*u[13]);
631       uik[12]= -(d[5]*u[7]+ d[12]*u[8]+ d[19]*u[9]+ d[26]*u[10]+ d[33]*u[11]+ d[40]*u[12]+ d[47]*u[13]);
632       uik[13]= -(d[6]*u[7]+ d[13]*u[8]+ d[20]*u[9]+ d[27]*u[10]+ d[34]*u[11]+ d[41]*u[12]+ d[48]*u[13]);
633 
634       uik[14]= -(d[0]*u[14] + d[7]*u[15]+ d[14]*u[16]+ d[21]*u[17]+ d[28]*u[18]+ d[35]*u[19]+ d[42]*u[20]);
635       uik[15]= -(d[1]*u[14] + d[8]*u[15]+ d[15]*u[16]+ d[22]*u[17]+ d[29]*u[18]+ d[36]*u[19]+ d[43]*u[20]);
636       uik[16]= -(d[2]*u[14] + d[9]*u[15]+ d[16]*u[16]+ d[23]*u[17]+ d[30]*u[18]+ d[37]*u[19]+ d[44]*u[20]);
637       uik[17]= -(d[3]*u[14]+ d[10]*u[15]+ d[17]*u[16]+ d[24]*u[17]+ d[31]*u[18]+ d[38]*u[19]+ d[45]*u[20]);
638       uik[18]= -(d[4]*u[14]+ d[11]*u[15]+ d[18]*u[16]+ d[25]*u[17]+ d[32]*u[18]+ d[39]*u[19]+ d[46]*u[20]);
639       uik[19]= -(d[5]*u[14]+ d[12]*u[15]+ d[19]*u[16]+ d[26]*u[17]+ d[33]*u[18]+ d[40]*u[19]+ d[47]*u[20]);
640       uik[20]= -(d[6]*u[14]+ d[13]*u[15]+ d[20]*u[16]+ d[27]*u[17]+ d[34]*u[18]+ d[41]*u[19]+ d[48]*u[20]);
641 
642       uik[21]= -(d[0]*u[21] + d[7]*u[22]+ d[14]*u[23]+ d[21]*u[24]+ d[28]*u[25]+ d[35]*u[26]+ d[42]*u[27]);
643       uik[22]= -(d[1]*u[21] + d[8]*u[22]+ d[15]*u[23]+ d[22]*u[24]+ d[29]*u[25]+ d[36]*u[26]+ d[43]*u[27]);
644       uik[23]= -(d[2]*u[21] + d[9]*u[22]+ d[16]*u[23]+ d[23]*u[24]+ d[30]*u[25]+ d[37]*u[26]+ d[44]*u[27]);
645       uik[24]= -(d[3]*u[21]+ d[10]*u[22]+ d[17]*u[23]+ d[24]*u[24]+ d[31]*u[25]+ d[38]*u[26]+ d[45]*u[27]);
646       uik[25]= -(d[4]*u[21]+ d[11]*u[22]+ d[18]*u[23]+ d[25]*u[24]+ d[32]*u[25]+ d[39]*u[26]+ d[46]*u[27]);
647       uik[26]= -(d[5]*u[21]+ d[12]*u[22]+ d[19]*u[23]+ d[26]*u[24]+ d[33]*u[25]+ d[40]*u[26]+ d[47]*u[27]);
648       uik[27]= -(d[6]*u[21]+ d[13]*u[22]+ d[20]*u[23]+ d[27]*u[24]+ d[34]*u[25]+ d[41]*u[26]+ d[48]*u[27]);
649 
650       uik[28]= -(d[0]*u[28] + d[7]*u[29]+ d[14]*u[30]+ d[21]*u[31]+ d[28]*u[32]+ d[35]*u[33]+ d[42]*u[34]);
651       uik[29]= -(d[1]*u[28] + d[8]*u[29]+ d[15]*u[30]+ d[22]*u[31]+ d[29]*u[32]+ d[36]*u[33]+ d[43]*u[34]);
652       uik[30]= -(d[2]*u[28] + d[9]*u[29]+ d[16]*u[30]+ d[23]*u[31]+ d[30]*u[32]+ d[37]*u[33]+ d[44]*u[34]);
653       uik[31]= -(d[3]*u[28]+ d[10]*u[29]+ d[17]*u[30]+ d[24]*u[31]+ d[31]*u[32]+ d[38]*u[33]+ d[45]*u[34]);
654       uik[32]= -(d[4]*u[28]+ d[11]*u[29]+ d[18]*u[30]+ d[25]*u[31]+ d[32]*u[32]+ d[39]*u[33]+ d[46]*u[34]);
655       uik[33]= -(d[5]*u[28]+ d[12]*u[29]+ d[19]*u[30]+ d[26]*u[31]+ d[33]*u[32]+ d[40]*u[33]+ d[47]*u[34]);
656       uik[34]= -(d[6]*u[28]+ d[13]*u[29]+ d[20]*u[30]+ d[27]*u[31]+ d[34]*u[32]+ d[41]*u[33]+ d[48]*u[34]);
657 
658       uik[35]= -(d[0]*u[35] + d[7]*u[36]+ d[14]*u[37]+ d[21]*u[38]+ d[28]*u[39]+ d[35]*u[40]+ d[42]*u[41]);
659       uik[36]= -(d[1]*u[35] + d[8]*u[36]+ d[15]*u[37]+ d[22]*u[38]+ d[29]*u[39]+ d[36]*u[40]+ d[43]*u[41]);
660       uik[37]= -(d[2]*u[35] + d[9]*u[36]+ d[16]*u[37]+ d[23]*u[38]+ d[30]*u[39]+ d[37]*u[40]+ d[44]*u[41]);
661       uik[38]= -(d[3]*u[35]+ d[10]*u[36]+ d[17]*u[37]+ d[24]*u[38]+ d[31]*u[39]+ d[38]*u[40]+ d[45]*u[41]);
662       uik[39]= -(d[4]*u[35]+ d[11]*u[36]+ d[18]*u[37]+ d[25]*u[38]+ d[32]*u[39]+ d[39]*u[40]+ d[46]*u[41]);
663       uik[40]= -(d[5]*u[35]+ d[12]*u[36]+ d[19]*u[37]+ d[26]*u[38]+ d[33]*u[39]+ d[40]*u[40]+ d[47]*u[41]);
664       uik[41]= -(d[6]*u[35]+ d[13]*u[36]+ d[20]*u[37]+ d[27]*u[38]+ d[34]*u[39]+ d[41]*u[40]+ d[48]*u[41]);
665 
666       uik[42]= -(d[0]*u[42] + d[7]*u[43]+ d[14]*u[44]+ d[21]*u[45]+ d[28]*u[46]+ d[35]*u[47]+ d[42]*u[48]);
667       uik[43]= -(d[1]*u[42] + d[8]*u[43]+ d[15]*u[44]+ d[22]*u[45]+ d[29]*u[46]+ d[36]*u[47]+ d[43]*u[48]);
668       uik[44]= -(d[2]*u[42] + d[9]*u[43]+ d[16]*u[44]+ d[23]*u[45]+ d[30]*u[46]+ d[37]*u[47]+ d[44]*u[48]);
669       uik[45]= -(d[3]*u[42]+ d[10]*u[43]+ d[17]*u[44]+ d[24]*u[45]+ d[31]*u[46]+ d[38]*u[47]+ d[45]*u[48]);
670       uik[46]= -(d[4]*u[42]+ d[11]*u[43]+ d[18]*u[44]+ d[25]*u[45]+ d[32]*u[46]+ d[39]*u[47]+ d[46]*u[48]);
671       uik[47]= -(d[5]*u[42]+ d[12]*u[43]+ d[19]*u[44]+ d[26]*u[45]+ d[33]*u[46]+ d[40]*u[47]+ d[47]*u[48]);
672       uik[48]= -(d[6]*u[42]+ d[13]*u[43]+ d[20]*u[44]+ d[27]*u[45]+ d[34]*u[46]+ d[41]*u[47]+ d[48]*u[48]);
673 
674       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
675       dk[0]+=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6];
676       dk[1]+=  uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6];
677       dk[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6];
678       dk[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6];
679       dk[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6];
680       dk[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6];
681       dk[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6];
682 
683       dk[7]+=  uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13];
684       dk[8]+=  uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13];
685       dk[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13];
686       dk[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13];
687       dk[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13];
688       dk[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13];
689       dk[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13];
690 
691       dk[14]+=  uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20];
692       dk[15]+=  uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20];
693       dk[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20];
694       dk[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20];
695       dk[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20];
696       dk[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20];
697       dk[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20];
698 
699       dk[21]+=  uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27];
700       dk[22]+=  uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27];
701       dk[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27];
702       dk[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27];
703       dk[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27];
704       dk[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27];
705       dk[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27];
706 
707       dk[28]+=  uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34];
708       dk[29]+=  uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34];
709       dk[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34];
710       dk[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34];
711       dk[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34];
712       dk[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34];
713       dk[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34];
714 
715       dk[35]+=  uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41];
716       dk[36]+=  uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41];
717       dk[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41];
718       dk[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41];
719       dk[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41];
720       dk[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41];
721       dk[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41];
722 
723       dk[42]+=  uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48];
724       dk[43]+=  uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48];
725       dk[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48];
726       dk[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48];
727       dk[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48];
728       dk[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48];
729       dk[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48];
730 
731       /* update -U(i,k) */
732       ierr = PetscMemcpy(ba+ili*49,uik,49*sizeof(MatScalar));CHKERRQ(ierr);
733 
734       /* add multiple of row i to k-th row ... */
735       jmin = ili + 1; jmax = bi[i+1];
736       if (jmin < jmax){
737         for (j=jmin; j<jmax; j++) {
738           /* w += -U(i,k)^T * U_bar(i,j) */
739           wp = w + bj[j]*49;
740           u = ba + j*49;
741 
742           wp[0]+=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6];
743           wp[1]+=  uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6];
744           wp[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6];
745           wp[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6];
746           wp[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6];
747           wp[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6];
748           wp[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6];
749 
750           wp[7]+=  uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13];
751           wp[8]+=  uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13];
752           wp[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13];
753           wp[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13];
754           wp[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13];
755           wp[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13];
756           wp[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13];
757 
758           wp[14]+=  uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20];
759           wp[15]+=  uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20];
760           wp[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20];
761           wp[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20];
762           wp[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20];
763           wp[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20];
764           wp[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20];
765 
766           wp[21]+=  uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27];
767           wp[22]+=  uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27];
768           wp[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27];
769           wp[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27];
770           wp[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27];
771           wp[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27];
772           wp[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27];
773 
774           wp[28]+=  uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34];
775           wp[29]+=  uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34];
776           wp[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34];
777           wp[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34];
778           wp[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34];
779           wp[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34];
780           wp[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34];
781 
782           wp[35]+=  uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41];
783           wp[36]+=  uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41];
784           wp[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41];
785           wp[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41];
786           wp[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41];
787           wp[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41];
788           wp[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41];
789 
790           wp[42]+=  uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48];
791           wp[43]+=  uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48];
792           wp[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48];
793           wp[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48];
794           wp[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48];
795           wp[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48];
796           wp[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48];
797         }
798 
799         /* ... add i to row list for next nonzero entry */
800         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
801         j     = bj[jmin];
802         jl[i] = jl[j]; jl[j] = i; /* update jl */
803       }
804       i = nexti;
805     }
806 
807     /* save nonzero entries in k-th row of U ... */
808 
809     /* invert diagonal block */
810     d = ba+k*49;
811     ierr = PetscMemcpy(d,dk,49*sizeof(MatScalar));CHKERRQ(ierr);
812     ierr = Kernel_A_gets_inverse_A_7(d);CHKERRQ(ierr);
813 
814     jmin = bi[k]; jmax = bi[k+1];
815     if (jmin < jmax) {
816       for (j=jmin; j<jmax; j++){
817          vj = bj[j];           /* block col. index of U */
818          u   = ba + j*49;
819          wp = w + vj*49;
820          for (k1=0; k1<49; k1++){
821            *u++        = *wp;
822            *wp++ = 0.0;
823          }
824       }
825 
826       /* ... add k to row list for first nonzero entry in k-th row */
827       il[k] = jmin;
828       i     = bj[jmin];
829       jl[k] = jl[i]; jl[i] = k;
830     }
831   }
832 
833   ierr = PetscFree(w);CHKERRQ(ierr);
834   ierr = PetscFree(il);CHKERRQ(ierr);
835   ierr = PetscFree(jl);CHKERRQ(ierr);
836   ierr = PetscFree(dk);CHKERRQ(ierr);
837   ierr = PetscFree(uik);CHKERRQ(ierr);
838   if (a->permute){
839     ierr = PetscFree(aa);CHKERRQ(ierr);
840   }
841 
842   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
843   C->factor    = FACTOR_CHOLESKY;
844   C->assembled = PETSC_TRUE;
845   C->preallocated = PETSC_TRUE;
846   PLogFlops(1.3333*343*b->mbs); /* from inverting diagonal blocks */
847   PetscFunctionReturn(0);
848 }
849 
850 /*
851       Version for when blocks are 7 by 7 Using natural ordering
852 */
853 #undef __FUNC__
854 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering"
855 int MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering(Mat A,Mat *B)
856 {
857   Mat                C = *B;
858   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
859   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
860   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
861   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
862   MatScalar          *u,*d,*w,*wp;
863 
864   PetscFunctionBegin;
865 
866   /* initialization */
867   w  = (MatScalar*)PetscMalloc(49*mbs*sizeof(MatScalar));CHKPTRQ(w);
868   ierr = PetscMemzero(w,49*mbs*sizeof(MatScalar));CHKERRQ(ierr);
869   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
870   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
871   for (i=0; i<mbs; i++) {
872     jl[i] = mbs; il[0] = 0;
873   }
874   dk    = (MatScalar*)PetscMalloc(49*sizeof(MatScalar));CHKPTRQ(dk);
875   uik   = (MatScalar*)PetscMalloc(49*sizeof(MatScalar));CHKPTRQ(uik);
876 
877   ai = a->i; aj = a->j; aa = a->a;
878 
879   /* for each row k */
880   for (k = 0; k<mbs; k++){
881 
882     /*initialize k-th row with elements nonzero in row k of A */
883     jmin = ai[k]; jmax = ai[k+1];
884     if (jmin < jmax) {
885       ap = aa + jmin*49;
886       for (j = jmin; j < jmax; j++){
887         vj = aj[j];         /* block col. index */
888         wp = w + vj*49;
889         for (i=0; i<49; i++) *wp++ = *ap++;
890       }
891     }
892 
893     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
894     ierr = PetscMemcpy(dk,w+k*49,49*sizeof(MatScalar));CHKERRQ(ierr);
895     i = jl[k]; /* first row to be added to k_th row  */
896 
897     while (i < mbs){
898       nexti = jl[i]; /* next row to be added to k_th row */
899 
900       /* compute multiplier */
901       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
902 
903       /* uik = -inv(Di)*U_bar(i,k) */
904       d = ba + i*49;
905       u    = ba + ili*49;
906 
907       uik[0] = -(d[0]*u[0] + d[7]*u[1]+ d[14]*u[2]+ d[21]*u[3]+ d[28]*u[4]+ d[35]*u[5]+ d[42]*u[6]);
908       uik[1] = -(d[1]*u[0] + d[8]*u[1]+ d[15]*u[2]+ d[22]*u[3]+ d[29]*u[4]+ d[36]*u[5]+ d[43]*u[6]);
909       uik[2] = -(d[2]*u[0] + d[9]*u[1]+ d[16]*u[2]+ d[23]*u[3]+ d[30]*u[4]+ d[37]*u[5]+ d[44]*u[6]);
910       uik[3] = -(d[3]*u[0]+ d[10]*u[1]+ d[17]*u[2]+ d[24]*u[3]+ d[31]*u[4]+ d[38]*u[5]+ d[45]*u[6]);
911       uik[4] = -(d[4]*u[0]+ d[11]*u[1]+ d[18]*u[2]+ d[25]*u[3]+ d[32]*u[4]+ d[39]*u[5]+ d[46]*u[6]);
912       uik[5] = -(d[5]*u[0]+ d[12]*u[1]+ d[19]*u[2]+ d[26]*u[3]+ d[33]*u[4]+ d[40]*u[5]+ d[47]*u[6]);
913       uik[6] = -(d[6]*u[0]+ d[13]*u[1]+ d[20]*u[2]+ d[27]*u[3]+ d[34]*u[4]+ d[41]*u[5]+ d[48]*u[6]);
914 
915       uik[7] = -(d[0]*u[7] + d[7]*u[8]+ d[14]*u[9]+ d[21]*u[10]+ d[28]*u[11]+ d[35]*u[12]+ d[42]*u[13]);
916       uik[8] = -(d[1]*u[7] + d[8]*u[8]+ d[15]*u[9]+ d[22]*u[10]+ d[29]*u[11]+ d[36]*u[12]+ d[43]*u[13]);
917       uik[9] = -(d[2]*u[7] + d[9]*u[8]+ d[16]*u[9]+ d[23]*u[10]+ d[30]*u[11]+ d[37]*u[12]+ d[44]*u[13]);
918       uik[10]= -(d[3]*u[7]+ d[10]*u[8]+ d[17]*u[9]+ d[24]*u[10]+ d[31]*u[11]+ d[38]*u[12]+ d[45]*u[13]);
919       uik[11]= -(d[4]*u[7]+ d[11]*u[8]+ d[18]*u[9]+ d[25]*u[10]+ d[32]*u[11]+ d[39]*u[12]+ d[46]*u[13]);
920       uik[12]= -(d[5]*u[7]+ d[12]*u[8]+ d[19]*u[9]+ d[26]*u[10]+ d[33]*u[11]+ d[40]*u[12]+ d[47]*u[13]);
921       uik[13]= -(d[6]*u[7]+ d[13]*u[8]+ d[20]*u[9]+ d[27]*u[10]+ d[34]*u[11]+ d[41]*u[12]+ d[48]*u[13]);
922 
923       uik[14]= -(d[0]*u[14] + d[7]*u[15]+ d[14]*u[16]+ d[21]*u[17]+ d[28]*u[18]+ d[35]*u[19]+ d[42]*u[20]);
924       uik[15]= -(d[1]*u[14] + d[8]*u[15]+ d[15]*u[16]+ d[22]*u[17]+ d[29]*u[18]+ d[36]*u[19]+ d[43]*u[20]);
925       uik[16]= -(d[2]*u[14] + d[9]*u[15]+ d[16]*u[16]+ d[23]*u[17]+ d[30]*u[18]+ d[37]*u[19]+ d[44]*u[20]);
926       uik[17]= -(d[3]*u[14]+ d[10]*u[15]+ d[17]*u[16]+ d[24]*u[17]+ d[31]*u[18]+ d[38]*u[19]+ d[45]*u[20]);
927       uik[18]= -(d[4]*u[14]+ d[11]*u[15]+ d[18]*u[16]+ d[25]*u[17]+ d[32]*u[18]+ d[39]*u[19]+ d[46]*u[20]);
928       uik[19]= -(d[5]*u[14]+ d[12]*u[15]+ d[19]*u[16]+ d[26]*u[17]+ d[33]*u[18]+ d[40]*u[19]+ d[47]*u[20]);
929       uik[20]= -(d[6]*u[14]+ d[13]*u[15]+ d[20]*u[16]+ d[27]*u[17]+ d[34]*u[18]+ d[41]*u[19]+ d[48]*u[20]);
930 
931       uik[21]= -(d[0]*u[21] + d[7]*u[22]+ d[14]*u[23]+ d[21]*u[24]+ d[28]*u[25]+ d[35]*u[26]+ d[42]*u[27]);
932       uik[22]= -(d[1]*u[21] + d[8]*u[22]+ d[15]*u[23]+ d[22]*u[24]+ d[29]*u[25]+ d[36]*u[26]+ d[43]*u[27]);
933       uik[23]= -(d[2]*u[21] + d[9]*u[22]+ d[16]*u[23]+ d[23]*u[24]+ d[30]*u[25]+ d[37]*u[26]+ d[44]*u[27]);
934       uik[24]= -(d[3]*u[21]+ d[10]*u[22]+ d[17]*u[23]+ d[24]*u[24]+ d[31]*u[25]+ d[38]*u[26]+ d[45]*u[27]);
935       uik[25]= -(d[4]*u[21]+ d[11]*u[22]+ d[18]*u[23]+ d[25]*u[24]+ d[32]*u[25]+ d[39]*u[26]+ d[46]*u[27]);
936       uik[26]= -(d[5]*u[21]+ d[12]*u[22]+ d[19]*u[23]+ d[26]*u[24]+ d[33]*u[25]+ d[40]*u[26]+ d[47]*u[27]);
937       uik[27]= -(d[6]*u[21]+ d[13]*u[22]+ d[20]*u[23]+ d[27]*u[24]+ d[34]*u[25]+ d[41]*u[26]+ d[48]*u[27]);
938 
939       uik[28]= -(d[0]*u[28] + d[7]*u[29]+ d[14]*u[30]+ d[21]*u[31]+ d[28]*u[32]+ d[35]*u[33]+ d[42]*u[34]);
940       uik[29]= -(d[1]*u[28] + d[8]*u[29]+ d[15]*u[30]+ d[22]*u[31]+ d[29]*u[32]+ d[36]*u[33]+ d[43]*u[34]);
941       uik[30]= -(d[2]*u[28] + d[9]*u[29]+ d[16]*u[30]+ d[23]*u[31]+ d[30]*u[32]+ d[37]*u[33]+ d[44]*u[34]);
942       uik[31]= -(d[3]*u[28]+ d[10]*u[29]+ d[17]*u[30]+ d[24]*u[31]+ d[31]*u[32]+ d[38]*u[33]+ d[45]*u[34]);
943       uik[32]= -(d[4]*u[28]+ d[11]*u[29]+ d[18]*u[30]+ d[25]*u[31]+ d[32]*u[32]+ d[39]*u[33]+ d[46]*u[34]);
944       uik[33]= -(d[5]*u[28]+ d[12]*u[29]+ d[19]*u[30]+ d[26]*u[31]+ d[33]*u[32]+ d[40]*u[33]+ d[47]*u[34]);
945       uik[34]= -(d[6]*u[28]+ d[13]*u[29]+ d[20]*u[30]+ d[27]*u[31]+ d[34]*u[32]+ d[41]*u[33]+ d[48]*u[34]);
946 
947       uik[35]= -(d[0]*u[35] + d[7]*u[36]+ d[14]*u[37]+ d[21]*u[38]+ d[28]*u[39]+ d[35]*u[40]+ d[42]*u[41]);
948       uik[36]= -(d[1]*u[35] + d[8]*u[36]+ d[15]*u[37]+ d[22]*u[38]+ d[29]*u[39]+ d[36]*u[40]+ d[43]*u[41]);
949       uik[37]= -(d[2]*u[35] + d[9]*u[36]+ d[16]*u[37]+ d[23]*u[38]+ d[30]*u[39]+ d[37]*u[40]+ d[44]*u[41]);
950       uik[38]= -(d[3]*u[35]+ d[10]*u[36]+ d[17]*u[37]+ d[24]*u[38]+ d[31]*u[39]+ d[38]*u[40]+ d[45]*u[41]);
951       uik[39]= -(d[4]*u[35]+ d[11]*u[36]+ d[18]*u[37]+ d[25]*u[38]+ d[32]*u[39]+ d[39]*u[40]+ d[46]*u[41]);
952       uik[40]= -(d[5]*u[35]+ d[12]*u[36]+ d[19]*u[37]+ d[26]*u[38]+ d[33]*u[39]+ d[40]*u[40]+ d[47]*u[41]);
953       uik[41]= -(d[6]*u[35]+ d[13]*u[36]+ d[20]*u[37]+ d[27]*u[38]+ d[34]*u[39]+ d[41]*u[40]+ d[48]*u[41]);
954 
955       uik[42]= -(d[0]*u[42] + d[7]*u[43]+ d[14]*u[44]+ d[21]*u[45]+ d[28]*u[46]+ d[35]*u[47]+ d[42]*u[48]);
956       uik[43]= -(d[1]*u[42] + d[8]*u[43]+ d[15]*u[44]+ d[22]*u[45]+ d[29]*u[46]+ d[36]*u[47]+ d[43]*u[48]);
957       uik[44]= -(d[2]*u[42] + d[9]*u[43]+ d[16]*u[44]+ d[23]*u[45]+ d[30]*u[46]+ d[37]*u[47]+ d[44]*u[48]);
958       uik[45]= -(d[3]*u[42]+ d[10]*u[43]+ d[17]*u[44]+ d[24]*u[45]+ d[31]*u[46]+ d[38]*u[47]+ d[45]*u[48]);
959       uik[46]= -(d[4]*u[42]+ d[11]*u[43]+ d[18]*u[44]+ d[25]*u[45]+ d[32]*u[46]+ d[39]*u[47]+ d[46]*u[48]);
960       uik[47]= -(d[5]*u[42]+ d[12]*u[43]+ d[19]*u[44]+ d[26]*u[45]+ d[33]*u[46]+ d[40]*u[47]+ d[47]*u[48]);
961       uik[48]= -(d[6]*u[42]+ d[13]*u[43]+ d[20]*u[44]+ d[27]*u[45]+ d[34]*u[46]+ d[41]*u[47]+ d[48]*u[48]);
962 
963       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
964       dk[0]+=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6];
965       dk[1]+=  uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6];
966       dk[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6];
967       dk[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6];
968       dk[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6];
969       dk[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6];
970       dk[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6];
971 
972       dk[7]+=  uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13];
973       dk[8]+=  uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13];
974       dk[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13];
975       dk[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13];
976       dk[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13];
977       dk[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13];
978       dk[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13];
979 
980       dk[14]+=  uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20];
981       dk[15]+=  uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20];
982       dk[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20];
983       dk[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20];
984       dk[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20];
985       dk[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20];
986       dk[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20];
987 
988       dk[21]+=  uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27];
989       dk[22]+=  uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27];
990       dk[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27];
991       dk[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27];
992       dk[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27];
993       dk[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27];
994       dk[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27];
995 
996       dk[28]+=  uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34];
997       dk[29]+=  uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34];
998       dk[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34];
999       dk[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34];
1000       dk[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34];
1001       dk[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34];
1002       dk[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34];
1003 
1004       dk[35]+=  uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41];
1005       dk[36]+=  uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41];
1006       dk[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41];
1007       dk[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41];
1008       dk[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41];
1009       dk[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41];
1010       dk[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41];
1011 
1012       dk[42]+=  uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48];
1013       dk[43]+=  uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48];
1014       dk[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48];
1015       dk[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48];
1016       dk[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48];
1017       dk[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48];
1018       dk[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48];
1019 
1020       /* update -U(i,k) */
1021       ierr = PetscMemcpy(ba+ili*49,uik,49*sizeof(MatScalar));CHKERRQ(ierr);
1022 
1023       /* add multiple of row i to k-th row ... */
1024       jmin = ili + 1; jmax = bi[i+1];
1025       if (jmin < jmax){
1026         for (j=jmin; j<jmax; j++) {
1027           /* w += -U(i,k)^T * U_bar(i,j) */
1028           wp = w + bj[j]*49;
1029           u = ba + j*49;
1030 
1031           wp[0]+=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6];
1032           wp[1]+=  uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6];
1033           wp[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6];
1034           wp[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6];
1035           wp[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6];
1036           wp[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6];
1037           wp[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6];
1038 
1039           wp[7]+=  uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13];
1040           wp[8]+=  uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13];
1041           wp[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13];
1042           wp[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13];
1043           wp[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13];
1044           wp[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13];
1045           wp[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13];
1046 
1047           wp[14]+=  uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20];
1048           wp[15]+=  uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20];
1049           wp[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20];
1050           wp[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20];
1051           wp[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20];
1052           wp[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20];
1053           wp[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20];
1054 
1055           wp[21]+=  uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27];
1056           wp[22]+=  uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27];
1057           wp[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27];
1058           wp[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27];
1059           wp[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27];
1060           wp[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27];
1061           wp[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27];
1062 
1063           wp[28]+=  uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34];
1064           wp[29]+=  uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34];
1065           wp[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34];
1066           wp[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34];
1067           wp[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34];
1068           wp[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34];
1069           wp[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34];
1070 
1071           wp[35]+=  uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41];
1072           wp[36]+=  uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41];
1073           wp[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41];
1074           wp[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41];
1075           wp[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41];
1076           wp[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41];
1077           wp[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41];
1078 
1079           wp[42]+=  uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48];
1080           wp[43]+=  uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48];
1081           wp[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48];
1082           wp[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48];
1083           wp[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48];
1084           wp[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48];
1085           wp[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48];
1086         }
1087 
1088         /* ... add i to row list for next nonzero entry */
1089         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1090         j     = bj[jmin];
1091         jl[i] = jl[j]; jl[j] = i; /* update jl */
1092       }
1093       i = nexti;
1094     }
1095 
1096     /* save nonzero entries in k-th row of U ... */
1097 
1098     /* invert diagonal block */
1099     d = ba+k*49;
1100     ierr = PetscMemcpy(d,dk,49*sizeof(MatScalar));CHKERRQ(ierr);
1101     ierr = Kernel_A_gets_inverse_A_7(d);CHKERRQ(ierr);
1102 
1103     jmin = bi[k]; jmax = bi[k+1];
1104     if (jmin < jmax) {
1105       for (j=jmin; j<jmax; j++){
1106          vj = bj[j];           /* block col. index of U */
1107          u   = ba + j*49;
1108          wp = w + vj*49;
1109          for (k1=0; k1<49; k1++){
1110            *u++        = *wp;
1111            *wp++ = 0.0;
1112          }
1113       }
1114 
1115       /* ... add k to row list for first nonzero entry in k-th row */
1116       il[k] = jmin;
1117       i     = bj[jmin];
1118       jl[k] = jl[i]; jl[i] = k;
1119     }
1120   }
1121 
1122   ierr = PetscFree(w);CHKERRQ(ierr);
1123   ierr = PetscFree(il);CHKERRQ(ierr);
1124   ierr = PetscFree(jl);CHKERRQ(ierr);
1125   ierr = PetscFree(dk);CHKERRQ(ierr);
1126   ierr = PetscFree(uik);CHKERRQ(ierr);
1127 
1128   C->factor    = FACTOR_CHOLESKY;
1129   C->assembled = PETSC_TRUE;
1130   C->preallocated = PETSC_TRUE;
1131   PLogFlops(1.3333*343*b->mbs); /* from inverting diagonal blocks */
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /* Version for when blocks are 6 by 6 */
1136 #undef __FUNC__
1137 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_6"
1138 int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat A,Mat *B)
1139 {
1140   Mat                C = *B;
1141   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1142   IS                 perm = b->row;
1143   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1144   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1145   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
1146   MatScalar          *u,*d,*w,*wp;
1147 
1148   PetscFunctionBegin;
1149   /* initialization */
1150   w  = (MatScalar*)PetscMalloc(36*mbs*sizeof(MatScalar));CHKPTRQ(w);
1151   ierr = PetscMemzero(w,36*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1152   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
1153   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
1154   for (i=0; i<mbs; i++) {
1155     jl[i] = mbs; il[0] = 0;
1156   }
1157   dk    = (MatScalar*)PetscMalloc(36*sizeof(MatScalar));CHKPTRQ(dk);
1158   uik   = (MatScalar*)PetscMalloc(36*sizeof(MatScalar));CHKPTRQ(uik);
1159   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
1160 
1161   /* check permutation */
1162   if (!a->permute){
1163     ai = a->i; aj = a->j; aa = a->a;
1164   } else {
1165     ai = a->inew; aj = a->jnew;
1166     aa = (MatScalar*)PetscMalloc(36*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
1167     ierr = PetscMemcpy(aa,a->a,36*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1168     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
1169     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
1170 
1171     for (i=0; i<mbs; i++){
1172       jmin = ai[i]; jmax = ai[i+1];
1173       for (j=jmin; j<jmax; j++){
1174         while (a2anew[j] != j){
1175           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
1176           for (k1=0; k1<36; k1++){
1177             dk[k1]       = aa[k*36+k1];
1178             aa[k*36+k1] = aa[j*36+k1];
1179             aa[j*36+k1] = dk[k1];
1180           }
1181         }
1182         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
1183         if (i > aj[j]){
1184           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
1185           ap = aa + j*36;                     /* ptr to the beginning of j-th block of aa */
1186           for (k=0; k<36; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
1187           for (k=0; k<6; k++){               /* j-th block of aa <- dk^T */
1188             for (k1=0; k1<6; k1++) *ap++ = dk[k + 6*k1];
1189           }
1190         }
1191       }
1192     }
1193     ierr = PetscFree(a2anew);CHKERRA(ierr);
1194   }
1195 
1196   /* for each row k */
1197   for (k = 0; k<mbs; k++){
1198 
1199     /*initialize k-th row with elements nonzero in row perm(k) of A */
1200     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
1201     if (jmin < jmax) {
1202       ap = aa + jmin*36;
1203       for (j = jmin; j < jmax; j++){
1204         vj = perm_ptr[aj[j]];         /* block col. index */
1205         wp = w + vj*36;
1206         for (i=0; i<36; i++) *wp++ = *ap++;
1207       }
1208     }
1209 
1210     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1211     ierr = PetscMemcpy(dk,w+k*36,36*sizeof(MatScalar));CHKERRQ(ierr);
1212     i = jl[k]; /* first row to be added to k_th row  */
1213 
1214     while (i < mbs){
1215       nexti = jl[i]; /* next row to be added to k_th row */
1216 
1217       /* compute multiplier */
1218       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1219 
1220       /* uik = -inv(Di)*U_bar(i,k) */
1221       d = ba + i*36;
1222       u    = ba + ili*36;
1223 
1224       uik[0] = -(d[0]*u[0] + d[6]*u[1] + d[12]*u[2] + d[18]*u[3] + d[24]*u[4] + d[30]*u[5]);
1225       uik[1] = -(d[1]*u[0] + d[7]*u[1] + d[13]*u[2] + d[19]*u[3] + d[25]*u[4] + d[31]*u[5]);
1226       uik[2] = -(d[2]*u[0] + d[8]*u[1] + d[14]*u[2] + d[20]*u[3] + d[26]*u[4] + d[32]*u[5]);
1227       uik[3] = -(d[3]*u[0] + d[9]*u[1] + d[15]*u[2] + d[21]*u[3] + d[27]*u[4] + d[33]*u[5]);
1228       uik[4] = -(d[4]*u[0]+ d[10]*u[1] + d[16]*u[2] + d[22]*u[3] + d[28]*u[4] + d[34]*u[5]);
1229       uik[5] = -(d[5]*u[0]+ d[11]*u[1] + d[17]*u[2] + d[23]*u[3] + d[29]*u[4] + d[35]*u[5]);
1230 
1231       uik[6] = -(d[0]*u[6] + d[6]*u[7] + d[12]*u[8] + d[18]*u[9] + d[24]*u[10] + d[30]*u[11]);
1232       uik[7] = -(d[1]*u[6] + d[7]*u[7] + d[13]*u[8] + d[19]*u[9] + d[25]*u[10] + d[31]*u[11]);
1233       uik[8] = -(d[2]*u[6] + d[8]*u[7] + d[14]*u[8] + d[20]*u[9] + d[26]*u[10] + d[32]*u[11]);
1234       uik[9] = -(d[3]*u[6] + d[9]*u[7] + d[15]*u[8] + d[21]*u[9] + d[27]*u[10] + d[33]*u[11]);
1235       uik[10]= -(d[4]*u[6]+ d[10]*u[7] + d[16]*u[8] + d[22]*u[9] + d[28]*u[10] + d[34]*u[11]);
1236       uik[11]= -(d[5]*u[6]+ d[11]*u[7] + d[17]*u[8] + d[23]*u[9] + d[29]*u[10] + d[35]*u[11]);
1237 
1238       uik[12] = -(d[0]*u[12] + d[6]*u[13] + d[12]*u[14] + d[18]*u[15] + d[24]*u[16] + d[30]*u[17]);
1239       uik[13] = -(d[1]*u[12] + d[7]*u[13] + d[13]*u[14] + d[19]*u[15] + d[25]*u[16] + d[31]*u[17]);
1240       uik[14] = -(d[2]*u[12] + d[8]*u[13] + d[14]*u[14] + d[20]*u[15] + d[26]*u[16] + d[32]*u[17]);
1241       uik[15] = -(d[3]*u[12] + d[9]*u[13] + d[15]*u[14] + d[21]*u[15] + d[27]*u[16] + d[33]*u[17]);
1242       uik[16] = -(d[4]*u[12]+ d[10]*u[13] + d[16]*u[14] + d[22]*u[15] + d[28]*u[16] + d[34]*u[17]);
1243       uik[17] = -(d[5]*u[12]+ d[11]*u[13] + d[17]*u[14] + d[23]*u[15] + d[29]*u[16] + d[35]*u[17]);
1244 
1245       uik[18] = -(d[0]*u[18] + d[6]*u[19] + d[12]*u[20] + d[18]*u[21] + d[24]*u[22] + d[30]*u[23]);
1246       uik[19] = -(d[1]*u[18] + d[7]*u[19] + d[13]*u[20] + d[19]*u[21] + d[25]*u[22] + d[31]*u[23]);
1247       uik[20] = -(d[2]*u[18] + d[8]*u[19] + d[14]*u[20] + d[20]*u[21] + d[26]*u[22] + d[32]*u[23]);
1248       uik[21] = -(d[3]*u[18] + d[9]*u[19] + d[15]*u[20] + d[21]*u[21] + d[27]*u[22] + d[33]*u[23]);
1249       uik[22] = -(d[4]*u[18]+ d[10]*u[19] + d[16]*u[20] + d[22]*u[21] + d[28]*u[22] + d[34]*u[23]);
1250       uik[23] = -(d[5]*u[18]+ d[11]*u[19] + d[17]*u[20] + d[23]*u[21] + d[29]*u[22] + d[35]*u[23]);
1251 
1252       uik[24] = -(d[0]*u[24] + d[6]*u[25] + d[12]*u[26] + d[18]*u[27] + d[24]*u[28] + d[30]*u[29]);
1253       uik[25] = -(d[1]*u[24] + d[7]*u[25] + d[13]*u[26] + d[19]*u[27] + d[25]*u[28] + d[31]*u[29]);
1254       uik[26] = -(d[2]*u[24] + d[8]*u[25] + d[14]*u[26] + d[20]*u[27] + d[26]*u[28] + d[32]*u[29]);
1255       uik[27] = -(d[3]*u[24] + d[9]*u[25] + d[15]*u[26] + d[21]*u[27] + d[27]*u[28] + d[33]*u[29]);
1256       uik[28] = -(d[4]*u[24]+ d[10]*u[25] + d[16]*u[26] + d[22]*u[27] + d[28]*u[28] + d[34]*u[29]);
1257       uik[29] = -(d[5]*u[24]+ d[11]*u[25] + d[17]*u[26] + d[23]*u[27] + d[29]*u[28] + d[35]*u[29]);
1258 
1259       uik[30] = -(d[0]*u[30] + d[6]*u[31] + d[12]*u[32] + d[18]*u[33] + d[24]*u[34] + d[30]*u[35]);
1260       uik[31] = -(d[1]*u[30] + d[7]*u[31] + d[13]*u[32] + d[19]*u[33] + d[25]*u[34] + d[31]*u[35]);
1261       uik[32] = -(d[2]*u[30] + d[8]*u[31] + d[14]*u[32] + d[20]*u[33] + d[26]*u[34] + d[32]*u[35]);
1262       uik[33] = -(d[3]*u[30] + d[9]*u[31] + d[15]*u[32] + d[21]*u[33] + d[27]*u[34] + d[33]*u[35]);
1263       uik[34] = -(d[4]*u[30]+ d[10]*u[31] + d[16]*u[32] + d[22]*u[33] + d[28]*u[34] + d[34]*u[35]);
1264       uik[35] = -(d[5]*u[30]+ d[11]*u[31] + d[17]*u[32] + d[23]*u[33] + d[29]*u[34] + d[35]*u[35]);
1265 
1266       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
1267       dk[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
1268       dk[1] +=  uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2] + uik[9]*u[3]+ uik[10]*u[4]+ uik[11]*u[5];
1269       dk[2] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]+ uik[16]*u[4]+ uik[17]*u[5];
1270       dk[3] += uik[18]*u[0]+ uik[19]*u[1]+ uik[20]*u[2]+ uik[21]*u[3]+ uik[22]*u[4]+ uik[23]*u[5];
1271       dk[4] += uik[24]*u[0]+ uik[25]*u[1]+ uik[26]*u[2]+ uik[27]*u[3]+ uik[28]*u[4]+ uik[29]*u[5];
1272       dk[5] += uik[30]*u[0]+ uik[31]*u[1]+ uik[32]*u[2]+ uik[33]*u[3]+ uik[34]*u[4]+ uik[35]*u[5];
1273 
1274       dk[6] +=  uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8] + uik[3]*u[9] + uik[4]*u[10] + uik[5]*u[11];
1275       dk[7] +=  uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11];
1276       dk[8] += uik[12]*u[6]+ uik[13]*u[7]+ uik[14]*u[8]+ uik[15]*u[9]+ uik[16]*u[10]+ uik[17]*u[11];
1277       dk[9] += uik[18]*u[6]+ uik[19]*u[7]+ uik[20]*u[8]+ uik[21]*u[9]+ uik[22]*u[10]+ uik[23]*u[11];
1278       dk[10]+= uik[24]*u[6]+ uik[25]*u[7]+ uik[26]*u[8]+ uik[27]*u[9]+ uik[28]*u[10]+ uik[29]*u[11];
1279       dk[11]+= uik[30]*u[6]+ uik[31]*u[7]+ uik[32]*u[8]+ uik[33]*u[9]+ uik[34]*u[10]+ uik[35]*u[11];
1280 
1281       dk[12]+=  uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15] + uik[4]*u[16] + uik[5]*u[17];
1282       dk[13]+=  uik[6]*u[12] + uik[7]*u[13] + uik[8]*u[14] + uik[9]*u[15]+ uik[10]*u[16]+ uik[11]*u[17];
1283       dk[14]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17];
1284       dk[15]+= uik[18]*u[12]+ uik[19]*u[13]+ uik[20]*u[14]+ uik[21]*u[15]+ uik[22]*u[16]+ uik[23]*u[17];
1285       dk[16]+= uik[24]*u[12]+ uik[25]*u[13]+ uik[26]*u[14]+ uik[27]*u[15]+ uik[28]*u[16]+ uik[29]*u[17];
1286       dk[17]+= uik[30]*u[12]+ uik[31]*u[13]+ uik[32]*u[14]+ uik[33]*u[15]+ uik[34]*u[16]+ uik[35]*u[17];
1287 
1288       dk[18]+=  uik[0]*u[18] + uik[1]*u[19] + uik[2]*u[20] + uik[3]*u[21] + uik[4]*u[22] + uik[5]*u[23];
1289       dk[19]+=  uik[6]*u[18] + uik[7]*u[19] + uik[8]*u[20] + uik[9]*u[21]+ uik[10]*u[22]+ uik[11]*u[23];
1290       dk[20]+= uik[12]*u[18]+ uik[13]*u[19]+ uik[14]*u[20]+ uik[15]*u[21]+ uik[16]*u[22]+ uik[17]*u[23];
1291       dk[21]+= uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23];
1292       dk[22]+= uik[24]*u[18]+ uik[25]*u[19]+ uik[26]*u[20]+ uik[27]*u[21]+ uik[28]*u[22]+ uik[29]*u[23];
1293       dk[23]+= uik[30]*u[18]+ uik[31]*u[19]+ uik[32]*u[20]+ uik[33]*u[21]+ uik[34]*u[22]+ uik[35]*u[23];
1294 
1295       dk[24]+=  uik[0]*u[24] + uik[1]*u[25] + uik[2]*u[26] + uik[3]*u[27] + uik[4]*u[28] + uik[5]*u[29];
1296       dk[25]+=  uik[6]*u[24] + uik[7]*u[25] + uik[8]*u[26] + uik[9]*u[27]+ uik[10]*u[28]+ uik[11]*u[29];
1297       dk[26]+= uik[12]*u[24]+ uik[13]*u[25]+ uik[14]*u[26]+ uik[15]*u[27]+ uik[16]*u[28]+ uik[17]*u[29];
1298       dk[27]+= uik[18]*u[24]+ uik[19]*u[25]+ uik[20]*u[26]+ uik[21]*u[27]+ uik[22]*u[28]+ uik[23]*u[29];
1299       dk[28]+= uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]+ uik[28]*u[28]+ uik[29]*u[29];
1300       dk[29]+= uik[30]*u[24]+ uik[31]*u[25]+ uik[32]*u[26]+ uik[33]*u[27]+ uik[34]*u[28]+ uik[35]*u[29];
1301 
1302       dk[30]+=  uik[0]*u[30] + uik[1]*u[31] + uik[2]*u[32] + uik[3]*u[33] + uik[4]*u[34] + uik[5]*u[35];
1303       dk[31]+=  uik[6]*u[30] + uik[7]*u[31] + uik[8]*u[32] + uik[9]*u[33]+ uik[10]*u[34]+ uik[11]*u[35];
1304       dk[32]+= uik[12]*u[30]+ uik[13]*u[31]+ uik[14]*u[32]+ uik[15]*u[33]+ uik[16]*u[34]+ uik[17]*u[35];
1305       dk[33]+= uik[18]*u[30]+ uik[19]*u[31]+ uik[20]*u[32]+ uik[21]*u[33]+ uik[22]*u[34]+ uik[23]*u[35];
1306       dk[34]+= uik[24]*u[30]+ uik[25]*u[31]+ uik[26]*u[32]+ uik[27]*u[33]+ uik[28]*u[34]+ uik[29]*u[35];
1307       dk[35]+= uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]+ uik[35]*u[35];
1308 
1309       /* update -U(i,k) */
1310       ierr = PetscMemcpy(ba+ili*36,uik,36*sizeof(MatScalar));CHKERRQ(ierr);
1311 
1312       /* add multiple of row i to k-th row ... */
1313       jmin = ili + 1; jmax = bi[i+1];
1314       if (jmin < jmax){
1315         for (j=jmin; j<jmax; j++) {
1316           /* w += -U(i,k)^T * U_bar(i,j) */
1317           wp = w + bj[j]*36;
1318           u = ba + j*36;
1319           wp[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
1320           wp[1] +=  uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2] + uik[9]*u[3]+ uik[10]*u[4]+ uik[11]*u[5];
1321           wp[2] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]+ uik[16]*u[4]+ uik[17]*u[5];
1322           wp[3] += uik[18]*u[0]+ uik[19]*u[1]+ uik[20]*u[2]+ uik[21]*u[3]+ uik[22]*u[4]+ uik[23]*u[5];
1323           wp[4] += uik[24]*u[0]+ uik[25]*u[1]+ uik[26]*u[2]+ uik[27]*u[3]+ uik[28]*u[4]+ uik[29]*u[5];
1324           wp[5] += uik[30]*u[0]+ uik[31]*u[1]+ uik[32]*u[2]+ uik[33]*u[3]+ uik[34]*u[4]+ uik[35]*u[5];
1325 
1326           wp[6] +=  uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8] + uik[3]*u[9] + uik[4]*u[10] + uik[5]*u[11];
1327           wp[7] +=  uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11];
1328           wp[8] += uik[12]*u[6]+ uik[13]*u[7]+ uik[14]*u[8]+ uik[15]*u[9]+ uik[16]*u[10]+ uik[17]*u[11];
1329           wp[9] += uik[18]*u[6]+ uik[19]*u[7]+ uik[20]*u[8]+ uik[21]*u[9]+ uik[22]*u[10]+ uik[23]*u[11];
1330           wp[10]+= uik[24]*u[6]+ uik[25]*u[7]+ uik[26]*u[8]+ uik[27]*u[9]+ uik[28]*u[10]+ uik[29]*u[11];
1331           wp[11]+= uik[30]*u[6]+ uik[31]*u[7]+ uik[32]*u[8]+ uik[33]*u[9]+ uik[34]*u[10]+ uik[35]*u[11];
1332 
1333           wp[12]+=  uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15] + uik[4]*u[16] + uik[5]*u[17];
1334           wp[13]+=  uik[6]*u[12] + uik[7]*u[13] + uik[8]*u[14] + uik[9]*u[15]+ uik[10]*u[16]+ uik[11]*u[17];
1335           wp[14]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17];
1336           wp[15]+= uik[18]*u[12]+ uik[19]*u[13]+ uik[20]*u[14]+ uik[21]*u[15]+ uik[22]*u[16]+ uik[23]*u[17];
1337           wp[16]+= uik[24]*u[12]+ uik[25]*u[13]+ uik[26]*u[14]+ uik[27]*u[15]+ uik[28]*u[16]+ uik[29]*u[17];
1338           wp[17]+= uik[30]*u[12]+ uik[31]*u[13]+ uik[32]*u[14]+ uik[33]*u[15]+ uik[34]*u[16]+ uik[35]*u[17];
1339 
1340           wp[18]+=  uik[0]*u[18] + uik[1]*u[19] + uik[2]*u[20] + uik[3]*u[21] + uik[4]*u[22] + uik[5]*u[23];
1341           wp[19]+=  uik[6]*u[18] + uik[7]*u[19] + uik[8]*u[20] + uik[9]*u[21]+ uik[10]*u[22]+ uik[11]*u[23];
1342           wp[20]+= uik[12]*u[18]+ uik[13]*u[19]+ uik[14]*u[20]+ uik[15]*u[21]+ uik[16]*u[22]+ uik[17]*u[23];
1343           wp[21]+= uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23];
1344           wp[22]+= uik[24]*u[18]+ uik[25]*u[19]+ uik[26]*u[20]+ uik[27]*u[21]+ uik[28]*u[22]+ uik[29]*u[23];
1345           wp[23]+= uik[30]*u[18]+ uik[31]*u[19]+ uik[32]*u[20]+ uik[33]*u[21]+ uik[34]*u[22]+ uik[35]*u[23];
1346 
1347           wp[24]+=  uik[0]*u[24] + uik[1]*u[25] + uik[2]*u[26] + uik[3]*u[27] + uik[4]*u[28] + uik[5]*u[29];
1348           wp[25]+=  uik[6]*u[24] + uik[7]*u[25] + uik[8]*u[26] + uik[9]*u[27]+ uik[10]*u[28]+ uik[11]*u[29];
1349           wp[26]+= uik[12]*u[24]+ uik[13]*u[25]+ uik[14]*u[26]+ uik[15]*u[27]+ uik[16]*u[28]+ uik[17]*u[29];
1350           wp[27]+= uik[18]*u[24]+ uik[19]*u[25]+ uik[20]*u[26]+ uik[21]*u[27]+ uik[22]*u[28]+ uik[23]*u[29];
1351           wp[28]+= uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]+ uik[28]*u[28]+ uik[29]*u[29];
1352           wp[29]+= uik[30]*u[24]+ uik[31]*u[25]+ uik[32]*u[26]+ uik[33]*u[27]+ uik[34]*u[28]+ uik[35]*u[29];
1353 
1354           wp[30]+=  uik[0]*u[30] + uik[1]*u[31] + uik[2]*u[32] + uik[3]*u[33] + uik[4]*u[34] + uik[5]*u[35];
1355           wp[31]+=  uik[6]*u[30] + uik[7]*u[31] + uik[8]*u[32] + uik[9]*u[33]+ uik[10]*u[34]+ uik[11]*u[35];
1356           wp[32]+= uik[12]*u[30]+ uik[13]*u[31]+ uik[14]*u[32]+ uik[15]*u[33]+ uik[16]*u[34]+ uik[17]*u[35];
1357           wp[33]+= uik[18]*u[30]+ uik[19]*u[31]+ uik[20]*u[32]+ uik[21]*u[33]+ uik[22]*u[34]+ uik[23]*u[35];
1358           wp[34]+= uik[24]*u[30]+ uik[25]*u[31]+ uik[26]*u[32]+ uik[27]*u[33]+ uik[28]*u[34]+ uik[29]*u[35];
1359           wp[35]+= uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]+ uik[35]*u[35];
1360         }
1361 
1362         /* ... add i to row list for next nonzero entry */
1363         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1364         j     = bj[jmin];
1365         jl[i] = jl[j]; jl[j] = i; /* update jl */
1366       }
1367       i = nexti;
1368     }
1369 
1370     /* save nonzero entries in k-th row of U ... */
1371 
1372     /* invert diagonal block */
1373     d = ba+k*36;
1374     ierr = PetscMemcpy(d,dk,36*sizeof(MatScalar));CHKERRQ(ierr);
1375     ierr = Kernel_A_gets_inverse_A_6(d);CHKERRQ(ierr);
1376 
1377     jmin = bi[k]; jmax = bi[k+1];
1378     if (jmin < jmax) {
1379       for (j=jmin; j<jmax; j++){
1380          vj = bj[j];           /* block col. index of U */
1381          u   = ba + j*36;
1382          wp = w + vj*36;
1383          for (k1=0; k1<36; k1++){
1384            *u++        = *wp;
1385            *wp++ = 0.0;
1386          }
1387       }
1388 
1389       /* ... add k to row list for first nonzero entry in k-th row */
1390       il[k] = jmin;
1391       i     = bj[jmin];
1392       jl[k] = jl[i]; jl[i] = k;
1393     }
1394   }
1395 
1396   ierr = PetscFree(w);CHKERRQ(ierr);
1397   ierr = PetscFree(il);CHKERRQ(ierr);
1398   ierr = PetscFree(jl);CHKERRQ(ierr);
1399   ierr = PetscFree(dk);CHKERRQ(ierr);
1400   ierr = PetscFree(uik);CHKERRQ(ierr);
1401   if (a->permute){
1402     ierr = PetscFree(aa);CHKERRQ(ierr);
1403   }
1404 
1405   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
1406   C->factor    = FACTOR_CHOLESKY;
1407   C->assembled = PETSC_TRUE;
1408   C->preallocated = PETSC_TRUE;
1409   PLogFlops(1.3333*216*b->mbs); /* from inverting diagonal blocks */
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 /*
1414       Version for when blocks are 6 by 6 Using natural ordering
1415 */
1416 #undef __FUNC__
1417 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering"
1418 int MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering(Mat A,Mat *B)
1419 {
1420   Mat                C = *B;
1421   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1422   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1423   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1424   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
1425   MatScalar          *u,*d,*w,*wp;
1426 
1427   PetscFunctionBegin;
1428 
1429   /* initialization */
1430   w  = (MatScalar*)PetscMalloc(36*mbs*sizeof(MatScalar));CHKPTRQ(w);
1431   ierr = PetscMemzero(w,36*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1432   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
1433   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
1434   for (i=0; i<mbs; i++) {
1435     jl[i] = mbs; il[0] = 0;
1436   }
1437   dk    = (MatScalar*)PetscMalloc(36*sizeof(MatScalar));CHKPTRQ(dk);
1438   uik   = (MatScalar*)PetscMalloc(36*sizeof(MatScalar));CHKPTRQ(uik);
1439 
1440   ai = a->i; aj = a->j; aa = a->a;
1441 
1442   /* for each row k */
1443   for (k = 0; k<mbs; k++){
1444 
1445     /*initialize k-th row with elements nonzero in row k of A */
1446     jmin = ai[k]; jmax = ai[k+1];
1447     if (jmin < jmax) {
1448       ap = aa + jmin*36;
1449       for (j = jmin; j < jmax; j++){
1450         vj = aj[j];         /* block col. index */
1451         wp = w + vj*36;
1452         for (i=0; i<36; i++) *wp++ = *ap++;
1453       }
1454     }
1455 
1456     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1457     ierr = PetscMemcpy(dk,w+k*36,36*sizeof(MatScalar));CHKERRQ(ierr);
1458     i = jl[k]; /* first row to be added to k_th row  */
1459 
1460     while (i < mbs){
1461       nexti = jl[i]; /* next row to be added to k_th row */
1462 
1463       /* compute multiplier */
1464       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1465 
1466       /* uik = -inv(Di)*U_bar(i,k) */
1467       d = ba + i*36;
1468       u    = ba + ili*36;
1469 
1470       uik[0] = -(d[0]*u[0] + d[6]*u[1] + d[12]*u[2] + d[18]*u[3] + d[24]*u[4] + d[30]*u[5]);
1471       uik[1] = -(d[1]*u[0] + d[7]*u[1] + d[13]*u[2] + d[19]*u[3] + d[25]*u[4] + d[31]*u[5]);
1472       uik[2] = -(d[2]*u[0] + d[8]*u[1] + d[14]*u[2] + d[20]*u[3] + d[26]*u[4] + d[32]*u[5]);
1473       uik[3] = -(d[3]*u[0] + d[9]*u[1] + d[15]*u[2] + d[21]*u[3] + d[27]*u[4] + d[33]*u[5]);
1474       uik[4] = -(d[4]*u[0]+ d[10]*u[1] + d[16]*u[2] + d[22]*u[3] + d[28]*u[4] + d[34]*u[5]);
1475       uik[5] = -(d[5]*u[0]+ d[11]*u[1] + d[17]*u[2] + d[23]*u[3] + d[29]*u[4] + d[35]*u[5]);
1476 
1477       uik[6] = -(d[0]*u[6] + d[6]*u[7] + d[12]*u[8] + d[18]*u[9] + d[24]*u[10] + d[30]*u[11]);
1478       uik[7] = -(d[1]*u[6] + d[7]*u[7] + d[13]*u[8] + d[19]*u[9] + d[25]*u[10] + d[31]*u[11]);
1479       uik[8] = -(d[2]*u[6] + d[8]*u[7] + d[14]*u[8] + d[20]*u[9] + d[26]*u[10] + d[32]*u[11]);
1480       uik[9] = -(d[3]*u[6] + d[9]*u[7] + d[15]*u[8] + d[21]*u[9] + d[27]*u[10] + d[33]*u[11]);
1481       uik[10]= -(d[4]*u[6]+ d[10]*u[7] + d[16]*u[8] + d[22]*u[9] + d[28]*u[10] + d[34]*u[11]);
1482       uik[11]= -(d[5]*u[6]+ d[11]*u[7] + d[17]*u[8] + d[23]*u[9] + d[29]*u[10] + d[35]*u[11]);
1483 
1484       uik[12] = -(d[0]*u[12] + d[6]*u[13] + d[12]*u[14] + d[18]*u[15] + d[24]*u[16] + d[30]*u[17]);
1485       uik[13] = -(d[1]*u[12] + d[7]*u[13] + d[13]*u[14] + d[19]*u[15] + d[25]*u[16] + d[31]*u[17]);
1486       uik[14] = -(d[2]*u[12] + d[8]*u[13] + d[14]*u[14] + d[20]*u[15] + d[26]*u[16] + d[32]*u[17]);
1487       uik[15] = -(d[3]*u[12] + d[9]*u[13] + d[15]*u[14] + d[21]*u[15] + d[27]*u[16] + d[33]*u[17]);
1488       uik[16] = -(d[4]*u[12]+ d[10]*u[13] + d[16]*u[14] + d[22]*u[15] + d[28]*u[16] + d[34]*u[17]);
1489       uik[17] = -(d[5]*u[12]+ d[11]*u[13] + d[17]*u[14] + d[23]*u[15] + d[29]*u[16] + d[35]*u[17]);
1490 
1491       uik[18] = -(d[0]*u[18] + d[6]*u[19] + d[12]*u[20] + d[18]*u[21] + d[24]*u[22] + d[30]*u[23]);
1492       uik[19] = -(d[1]*u[18] + d[7]*u[19] + d[13]*u[20] + d[19]*u[21] + d[25]*u[22] + d[31]*u[23]);
1493       uik[20] = -(d[2]*u[18] + d[8]*u[19] + d[14]*u[20] + d[20]*u[21] + d[26]*u[22] + d[32]*u[23]);
1494       uik[21] = -(d[3]*u[18] + d[9]*u[19] + d[15]*u[20] + d[21]*u[21] + d[27]*u[22] + d[33]*u[23]);
1495       uik[22] = -(d[4]*u[18]+ d[10]*u[19] + d[16]*u[20] + d[22]*u[21] + d[28]*u[22] + d[34]*u[23]);
1496       uik[23] = -(d[5]*u[18]+ d[11]*u[19] + d[17]*u[20] + d[23]*u[21] + d[29]*u[22] + d[35]*u[23]);
1497 
1498       uik[24] = -(d[0]*u[24] + d[6]*u[25] + d[12]*u[26] + d[18]*u[27] + d[24]*u[28] + d[30]*u[29]);
1499       uik[25] = -(d[1]*u[24] + d[7]*u[25] + d[13]*u[26] + d[19]*u[27] + d[25]*u[28] + d[31]*u[29]);
1500       uik[26] = -(d[2]*u[24] + d[8]*u[25] + d[14]*u[26] + d[20]*u[27] + d[26]*u[28] + d[32]*u[29]);
1501       uik[27] = -(d[3]*u[24] + d[9]*u[25] + d[15]*u[26] + d[21]*u[27] + d[27]*u[28] + d[33]*u[29]);
1502       uik[28] = -(d[4]*u[24]+ d[10]*u[25] + d[16]*u[26] + d[22]*u[27] + d[28]*u[28] + d[34]*u[29]);
1503       uik[29] = -(d[5]*u[24]+ d[11]*u[25] + d[17]*u[26] + d[23]*u[27] + d[29]*u[28] + d[35]*u[29]);
1504 
1505       uik[30] = -(d[0]*u[30] + d[6]*u[31] + d[12]*u[32] + d[18]*u[33] + d[24]*u[34] + d[30]*u[35]);
1506       uik[31] = -(d[1]*u[30] + d[7]*u[31] + d[13]*u[32] + d[19]*u[33] + d[25]*u[34] + d[31]*u[35]);
1507       uik[32] = -(d[2]*u[30] + d[8]*u[31] + d[14]*u[32] + d[20]*u[33] + d[26]*u[34] + d[32]*u[35]);
1508       uik[33] = -(d[3]*u[30] + d[9]*u[31] + d[15]*u[32] + d[21]*u[33] + d[27]*u[34] + d[33]*u[35]);
1509       uik[34] = -(d[4]*u[30]+ d[10]*u[31] + d[16]*u[32] + d[22]*u[33] + d[28]*u[34] + d[34]*u[35]);
1510       uik[35] = -(d[5]*u[30]+ d[11]*u[31] + d[17]*u[32] + d[23]*u[33] + d[29]*u[34] + d[35]*u[35]);
1511 
1512       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
1513       dk[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
1514       dk[1] +=  uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2] + uik[9]*u[3]+ uik[10]*u[4]+ uik[11]*u[5];
1515       dk[2] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]+ uik[16]*u[4]+ uik[17]*u[5];
1516       dk[3] += uik[18]*u[0]+ uik[19]*u[1]+ uik[20]*u[2]+ uik[21]*u[3]+ uik[22]*u[4]+ uik[23]*u[5];
1517       dk[4] += uik[24]*u[0]+ uik[25]*u[1]+ uik[26]*u[2]+ uik[27]*u[3]+ uik[28]*u[4]+ uik[29]*u[5];
1518       dk[5] += uik[30]*u[0]+ uik[31]*u[1]+ uik[32]*u[2]+ uik[33]*u[3]+ uik[34]*u[4]+ uik[35]*u[5];
1519 
1520       dk[6] +=  uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8] + uik[3]*u[9] + uik[4]*u[10] + uik[5]*u[11];
1521       dk[7] +=  uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11];
1522       dk[8] += uik[12]*u[6]+ uik[13]*u[7]+ uik[14]*u[8]+ uik[15]*u[9]+ uik[16]*u[10]+ uik[17]*u[11];
1523       dk[9] += uik[18]*u[6]+ uik[19]*u[7]+ uik[20]*u[8]+ uik[21]*u[9]+ uik[22]*u[10]+ uik[23]*u[11];
1524       dk[10]+= uik[24]*u[6]+ uik[25]*u[7]+ uik[26]*u[8]+ uik[27]*u[9]+ uik[28]*u[10]+ uik[29]*u[11];
1525       dk[11]+= uik[30]*u[6]+ uik[31]*u[7]+ uik[32]*u[8]+ uik[33]*u[9]+ uik[34]*u[10]+ uik[35]*u[11];
1526 
1527       dk[12]+=  uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15] + uik[4]*u[16] + uik[5]*u[17];
1528       dk[13]+=  uik[6]*u[12] + uik[7]*u[13] + uik[8]*u[14] + uik[9]*u[15]+ uik[10]*u[16]+ uik[11]*u[17];
1529       dk[14]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17];
1530       dk[15]+= uik[18]*u[12]+ uik[19]*u[13]+ uik[20]*u[14]+ uik[21]*u[15]+ uik[22]*u[16]+ uik[23]*u[17];
1531       dk[16]+= uik[24]*u[12]+ uik[25]*u[13]+ uik[26]*u[14]+ uik[27]*u[15]+ uik[28]*u[16]+ uik[29]*u[17];
1532       dk[17]+= uik[30]*u[12]+ uik[31]*u[13]+ uik[32]*u[14]+ uik[33]*u[15]+ uik[34]*u[16]+ uik[35]*u[17];
1533 
1534       dk[18]+=  uik[0]*u[18] + uik[1]*u[19] + uik[2]*u[20] + uik[3]*u[21] + uik[4]*u[22] + uik[5]*u[23];
1535       dk[19]+=  uik[6]*u[18] + uik[7]*u[19] + uik[8]*u[20] + uik[9]*u[21]+ uik[10]*u[22]+ uik[11]*u[23];
1536       dk[20]+= uik[12]*u[18]+ uik[13]*u[19]+ uik[14]*u[20]+ uik[15]*u[21]+ uik[16]*u[22]+ uik[17]*u[23];
1537       dk[21]+= uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23];
1538       dk[22]+= uik[24]*u[18]+ uik[25]*u[19]+ uik[26]*u[20]+ uik[27]*u[21]+ uik[28]*u[22]+ uik[29]*u[23];
1539       dk[23]+= uik[30]*u[18]+ uik[31]*u[19]+ uik[32]*u[20]+ uik[33]*u[21]+ uik[34]*u[22]+ uik[35]*u[23];
1540 
1541       dk[24]+=  uik[0]*u[24] + uik[1]*u[25] + uik[2]*u[26] + uik[3]*u[27] + uik[4]*u[28] + uik[5]*u[29];
1542       dk[25]+=  uik[6]*u[24] + uik[7]*u[25] + uik[8]*u[26] + uik[9]*u[27]+ uik[10]*u[28]+ uik[11]*u[29];
1543       dk[26]+= uik[12]*u[24]+ uik[13]*u[25]+ uik[14]*u[26]+ uik[15]*u[27]+ uik[16]*u[28]+ uik[17]*u[29];
1544       dk[27]+= uik[18]*u[24]+ uik[19]*u[25]+ uik[20]*u[26]+ uik[21]*u[27]+ uik[22]*u[28]+ uik[23]*u[29];
1545       dk[28]+= uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]+ uik[28]*u[28]+ uik[29]*u[29];
1546       dk[29]+= uik[30]*u[24]+ uik[31]*u[25]+ uik[32]*u[26]+ uik[33]*u[27]+ uik[34]*u[28]+ uik[35]*u[29];
1547 
1548       dk[30]+=  uik[0]*u[30] + uik[1]*u[31] + uik[2]*u[32] + uik[3]*u[33] + uik[4]*u[34] + uik[5]*u[35];
1549       dk[31]+=  uik[6]*u[30] + uik[7]*u[31] + uik[8]*u[32] + uik[9]*u[33]+ uik[10]*u[34]+ uik[11]*u[35];
1550       dk[32]+= uik[12]*u[30]+ uik[13]*u[31]+ uik[14]*u[32]+ uik[15]*u[33]+ uik[16]*u[34]+ uik[17]*u[35];
1551       dk[33]+= uik[18]*u[30]+ uik[19]*u[31]+ uik[20]*u[32]+ uik[21]*u[33]+ uik[22]*u[34]+ uik[23]*u[35];
1552       dk[34]+= uik[24]*u[30]+ uik[25]*u[31]+ uik[26]*u[32]+ uik[27]*u[33]+ uik[28]*u[34]+ uik[29]*u[35];
1553       dk[35]+= uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]+ uik[35]*u[35];
1554 
1555       /* update -U(i,k) */
1556       ierr = PetscMemcpy(ba+ili*36,uik,36*sizeof(MatScalar));CHKERRQ(ierr);
1557 
1558       /* add multiple of row i to k-th row ... */
1559       jmin = ili + 1; jmax = bi[i+1];
1560       if (jmin < jmax){
1561         for (j=jmin; j<jmax; j++) {
1562           /* w += -U(i,k)^T * U_bar(i,j) */
1563           wp = w + bj[j]*36;
1564           u = ba + j*36;
1565           wp[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
1566           wp[1] +=  uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2] + uik[9]*u[3]+ uik[10]*u[4]+ uik[11]*u[5];
1567           wp[2] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]+ uik[16]*u[4]+ uik[17]*u[5];
1568           wp[3] += uik[18]*u[0]+ uik[19]*u[1]+ uik[20]*u[2]+ uik[21]*u[3]+ uik[22]*u[4]+ uik[23]*u[5];
1569           wp[4] += uik[24]*u[0]+ uik[25]*u[1]+ uik[26]*u[2]+ uik[27]*u[3]+ uik[28]*u[4]+ uik[29]*u[5];
1570           wp[5] += uik[30]*u[0]+ uik[31]*u[1]+ uik[32]*u[2]+ uik[33]*u[3]+ uik[34]*u[4]+ uik[35]*u[5];
1571 
1572           wp[6] +=  uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8] + uik[3]*u[9] + uik[4]*u[10] + uik[5]*u[11];
1573           wp[7] +=  uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11];
1574           wp[8] += uik[12]*u[6]+ uik[13]*u[7]+ uik[14]*u[8]+ uik[15]*u[9]+ uik[16]*u[10]+ uik[17]*u[11];
1575           wp[9] += uik[18]*u[6]+ uik[19]*u[7]+ uik[20]*u[8]+ uik[21]*u[9]+ uik[22]*u[10]+ uik[23]*u[11];
1576           wp[10]+= uik[24]*u[6]+ uik[25]*u[7]+ uik[26]*u[8]+ uik[27]*u[9]+ uik[28]*u[10]+ uik[29]*u[11];
1577           wp[11]+= uik[30]*u[6]+ uik[31]*u[7]+ uik[32]*u[8]+ uik[33]*u[9]+ uik[34]*u[10]+ uik[35]*u[11];
1578 
1579           wp[12]+=  uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15] + uik[4]*u[16] + uik[5]*u[17];
1580           wp[13]+=  uik[6]*u[12] + uik[7]*u[13] + uik[8]*u[14] + uik[9]*u[15]+ uik[10]*u[16]+ uik[11]*u[17];
1581           wp[14]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17];
1582           wp[15]+= uik[18]*u[12]+ uik[19]*u[13]+ uik[20]*u[14]+ uik[21]*u[15]+ uik[22]*u[16]+ uik[23]*u[17];
1583           wp[16]+= uik[24]*u[12]+ uik[25]*u[13]+ uik[26]*u[14]+ uik[27]*u[15]+ uik[28]*u[16]+ uik[29]*u[17];
1584           wp[17]+= uik[30]*u[12]+ uik[31]*u[13]+ uik[32]*u[14]+ uik[33]*u[15]+ uik[34]*u[16]+ uik[35]*u[17];
1585 
1586           wp[18]+=  uik[0]*u[18] + uik[1]*u[19] + uik[2]*u[20] + uik[3]*u[21] + uik[4]*u[22] + uik[5]*u[23];
1587           wp[19]+=  uik[6]*u[18] + uik[7]*u[19] + uik[8]*u[20] + uik[9]*u[21]+ uik[10]*u[22]+ uik[11]*u[23];
1588           wp[20]+= uik[12]*u[18]+ uik[13]*u[19]+ uik[14]*u[20]+ uik[15]*u[21]+ uik[16]*u[22]+ uik[17]*u[23];
1589           wp[21]+= uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23];
1590           wp[22]+= uik[24]*u[18]+ uik[25]*u[19]+ uik[26]*u[20]+ uik[27]*u[21]+ uik[28]*u[22]+ uik[29]*u[23];
1591           wp[23]+= uik[30]*u[18]+ uik[31]*u[19]+ uik[32]*u[20]+ uik[33]*u[21]+ uik[34]*u[22]+ uik[35]*u[23];
1592 
1593           wp[24]+=  uik[0]*u[24] + uik[1]*u[25] + uik[2]*u[26] + uik[3]*u[27] + uik[4]*u[28] + uik[5]*u[29];
1594           wp[25]+=  uik[6]*u[24] + uik[7]*u[25] + uik[8]*u[26] + uik[9]*u[27]+ uik[10]*u[28]+ uik[11]*u[29];
1595           wp[26]+= uik[12]*u[24]+ uik[13]*u[25]+ uik[14]*u[26]+ uik[15]*u[27]+ uik[16]*u[28]+ uik[17]*u[29];
1596           wp[27]+= uik[18]*u[24]+ uik[19]*u[25]+ uik[20]*u[26]+ uik[21]*u[27]+ uik[22]*u[28]+ uik[23]*u[29];
1597           wp[28]+= uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]+ uik[28]*u[28]+ uik[29]*u[29];
1598           wp[29]+= uik[30]*u[24]+ uik[31]*u[25]+ uik[32]*u[26]+ uik[33]*u[27]+ uik[34]*u[28]+ uik[35]*u[29];
1599 
1600           wp[30]+=  uik[0]*u[30] + uik[1]*u[31] + uik[2]*u[32] + uik[3]*u[33] + uik[4]*u[34] + uik[5]*u[35];
1601           wp[31]+=  uik[6]*u[30] + uik[7]*u[31] + uik[8]*u[32] + uik[9]*u[33]+ uik[10]*u[34]+ uik[11]*u[35];
1602           wp[32]+= uik[12]*u[30]+ uik[13]*u[31]+ uik[14]*u[32]+ uik[15]*u[33]+ uik[16]*u[34]+ uik[17]*u[35];
1603           wp[33]+= uik[18]*u[30]+ uik[19]*u[31]+ uik[20]*u[32]+ uik[21]*u[33]+ uik[22]*u[34]+ uik[23]*u[35];
1604           wp[34]+= uik[24]*u[30]+ uik[25]*u[31]+ uik[26]*u[32]+ uik[27]*u[33]+ uik[28]*u[34]+ uik[29]*u[35];
1605           wp[35]+= uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]+ uik[35]*u[35];
1606         }
1607 
1608         /* ... add i to row list for next nonzero entry */
1609         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1610         j     = bj[jmin];
1611         jl[i] = jl[j]; jl[j] = i; /* update jl */
1612       }
1613       i = nexti;
1614     }
1615 
1616     /* save nonzero entries in k-th row of U ... */
1617 
1618     /* invert diagonal block */
1619     d = ba+k*36;
1620     ierr = PetscMemcpy(d,dk,36*sizeof(MatScalar));CHKERRQ(ierr);
1621     ierr = Kernel_A_gets_inverse_A_6(d);CHKERRQ(ierr);
1622 
1623     jmin = bi[k]; jmax = bi[k+1];
1624     if (jmin < jmax) {
1625       for (j=jmin; j<jmax; j++){
1626          vj = bj[j];           /* block col. index of U */
1627          u   = ba + j*36;
1628          wp = w + vj*36;
1629          for (k1=0; k1<36; k1++){
1630            *u++        = *wp;
1631            *wp++ = 0.0;
1632          }
1633       }
1634 
1635       /* ... add k to row list for first nonzero entry in k-th row */
1636       il[k] = jmin;
1637       i     = bj[jmin];
1638       jl[k] = jl[i]; jl[i] = k;
1639     }
1640   }
1641 
1642   ierr = PetscFree(w);CHKERRQ(ierr);
1643   ierr = PetscFree(il);CHKERRQ(ierr);
1644   ierr = PetscFree(jl);CHKERRQ(ierr);
1645   ierr = PetscFree(dk);CHKERRQ(ierr);
1646   ierr = PetscFree(uik);CHKERRQ(ierr);
1647 
1648   C->factor    = FACTOR_CHOLESKY;
1649   C->assembled = PETSC_TRUE;
1650   C->preallocated = PETSC_TRUE;
1651   PLogFlops(1.3333*216*b->mbs); /* from inverting diagonal blocks */
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 /* Version for when blocks are 5 by 5  */
1656 #undef __FUNC__
1657 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_5"
1658 int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat A,Mat *B)
1659 {
1660   Mat                C = *B;
1661   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1662   IS                 perm = b->row;
1663   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1664   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1665   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
1666   MatScalar          *u,*d,*rtmp,*rtmp_ptr;
1667 
1668   PetscFunctionBegin;
1669   /* initialization */
1670   rtmp  = (MatScalar*)PetscMalloc(25*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
1671   ierr = PetscMemzero(rtmp,25*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1672   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
1673   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
1674   for (i=0; i<mbs; i++) {
1675     jl[i] = mbs; il[0] = 0;
1676   }
1677   dk    = (MatScalar*)PetscMalloc(25*sizeof(MatScalar));CHKPTRQ(dk);
1678   uik   = (MatScalar*)PetscMalloc(25*sizeof(MatScalar));CHKPTRQ(uik);
1679   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
1680 
1681   /* check permutation */
1682   if (!a->permute){
1683     ai = a->i; aj = a->j; aa = a->a;
1684   } else {
1685     ai = a->inew; aj = a->jnew;
1686     aa = (MatScalar*)PetscMalloc(25*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
1687     ierr = PetscMemcpy(aa,a->a,25*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1688     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
1689     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
1690 
1691     for (i=0; i<mbs; i++){
1692       jmin = ai[i]; jmax = ai[i+1];
1693       for (j=jmin; j<jmax; j++){
1694         while (a2anew[j] != j){
1695           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
1696           for (k1=0; k1<25; k1++){
1697             dk[k1]       = aa[k*25+k1];
1698             aa[k*25+k1] = aa[j*25+k1];
1699             aa[j*25+k1] = dk[k1];
1700           }
1701         }
1702         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
1703         if (i > aj[j]){
1704           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
1705           ap = aa + j*25;                     /* ptr to the beginning of j-th block of aa */
1706           for (k=0; k<25; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
1707           for (k=0; k<5; k++){               /* j-th block of aa <- dk^T */
1708             for (k1=0; k1<5; k1++) *ap++ = dk[k + 5*k1];
1709           }
1710         }
1711       }
1712     }
1713     ierr = PetscFree(a2anew);CHKERRA(ierr);
1714   }
1715 
1716   /* for each row k */
1717   for (k = 0; k<mbs; k++){
1718 
1719     /*initialize k-th row with elements nonzero in row perm(k) of A */
1720     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
1721     if (jmin < jmax) {
1722       ap = aa + jmin*25;
1723       for (j = jmin; j < jmax; j++){
1724         vj = perm_ptr[aj[j]];         /* block col. index */
1725         rtmp_ptr = rtmp + vj*25;
1726         for (i=0; i<25; i++) *rtmp_ptr++ = *ap++;
1727       }
1728     }
1729 
1730     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1731     ierr = PetscMemcpy(dk,rtmp+k*25,25*sizeof(MatScalar));CHKERRQ(ierr);
1732     i = jl[k]; /* first row to be added to k_th row  */
1733 
1734     while (i < mbs){
1735       nexti = jl[i]; /* next row to be added to k_th row */
1736 
1737       /* compute multiplier */
1738       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1739 
1740       /* uik = -inv(Di)*U_bar(i,k) */
1741       d = ba + i*25;
1742       u    = ba + ili*25;
1743 
1744       uik[0] = -(d[0]*u[0] + d[5]*u[1] + d[10]*u[2] + d[15]*u[3] + d[20]*u[4]);
1745       uik[1] = -(d[1]*u[0] + d[6]*u[1] + d[11]*u[2] + d[16]*u[3] + d[21]*u[4]);
1746       uik[2] = -(d[2]*u[0] + d[7]*u[1] + d[12]*u[2] + d[17]*u[3] + d[22]*u[4]);
1747       uik[3] = -(d[3]*u[0] + d[8]*u[1] + d[13]*u[2] + d[18]*u[3] + d[23]*u[4]);
1748       uik[4] = -(d[4]*u[0] + d[9]*u[1] + d[14]*u[2] + d[19]*u[3] + d[24]*u[4]);
1749 
1750       uik[5] = -(d[0]*u[5] + d[5]*u[6] + d[10]*u[7] + d[15]*u[8] + d[20]*u[9]);
1751       uik[6] = -(d[1]*u[5] + d[6]*u[6] + d[11]*u[7] + d[16]*u[8] + d[21]*u[9]);
1752       uik[7] = -(d[2]*u[5] + d[7]*u[6] + d[12]*u[7] + d[17]*u[8] + d[22]*u[9]);
1753       uik[8] = -(d[3]*u[5] + d[8]*u[6] + d[13]*u[7] + d[18]*u[8] + d[23]*u[9]);
1754       uik[9] = -(d[4]*u[5] + d[9]*u[6] + d[14]*u[7] + d[19]*u[8] + d[24]*u[9]);
1755 
1756       uik[10]= -(d[0]*u[10] + d[5]*u[11] + d[10]*u[12] + d[15]*u[13] + d[20]*u[14]);
1757       uik[11]= -(d[1]*u[10] + d[6]*u[11] + d[11]*u[12] + d[16]*u[13] + d[21]*u[14]);
1758       uik[12]= -(d[2]*u[10] + d[7]*u[11] + d[12]*u[12] + d[17]*u[13] + d[22]*u[14]);
1759       uik[13]= -(d[3]*u[10] + d[8]*u[11] + d[13]*u[12] + d[18]*u[13] + d[23]*u[14]);
1760       uik[14]= -(d[4]*u[10] + d[9]*u[11] + d[14]*u[12] + d[19]*u[13] + d[24]*u[14]);
1761 
1762       uik[15]= -(d[0]*u[15] + d[5]*u[16] + d[10]*u[17] + d[15]*u[18] + d[20]*u[19]);
1763       uik[16]= -(d[1]*u[15] + d[6]*u[16] + d[11]*u[17] + d[16]*u[18] + d[21]*u[19]);
1764       uik[17]= -(d[2]*u[15] + d[7]*u[16] + d[12]*u[17] + d[17]*u[18] + d[22]*u[19]);
1765       uik[18]= -(d[3]*u[15] + d[8]*u[16] + d[13]*u[17] + d[18]*u[18] + d[23]*u[19]);
1766       uik[19]= -(d[4]*u[15] + d[9]*u[16] + d[14]*u[17] + d[19]*u[18] + d[24]*u[19]);
1767 
1768       uik[20]= -(d[0]*u[20] + d[5]*u[21] + d[10]*u[22] + d[15]*u[23] + d[20]*u[24]);
1769       uik[21]= -(d[1]*u[20] + d[6]*u[21] + d[11]*u[22] + d[16]*u[23] + d[21]*u[24]);
1770       uik[22]= -(d[2]*u[20] + d[7]*u[21] + d[12]*u[22] + d[17]*u[23] + d[22]*u[24]);
1771       uik[23]= -(d[3]*u[20] + d[8]*u[21] + d[13]*u[22] + d[18]*u[23] + d[23]*u[24]);
1772       uik[24]= -(d[4]*u[20] + d[9]*u[21] + d[14]*u[22] + d[19]*u[23] + d[24]*u[24]);
1773 
1774 
1775       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
1776       dk[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4];
1777       dk[1] +=  uik[5]*u[0] + uik[6]*u[1] + uik[7]*u[2] + uik[8]*u[3] + uik[9]*u[4];
1778       dk[2] += uik[10]*u[0]+ uik[11]*u[1]+ uik[12]*u[2]+ uik[13]*u[3]+ uik[14]*u[4];
1779       dk[3] += uik[15]*u[0]+ uik[16]*u[1]+ uik[17]*u[2]+ uik[18]*u[3]+ uik[19]*u[4];
1780       dk[4] += uik[20]*u[0]+ uik[21]*u[1]+ uik[22]*u[2]+ uik[23]*u[3]+ uik[24]*u[4];
1781 
1782       dk[5] +=  uik[0]*u[5] + uik[1]*u[6] + uik[2]*u[7] + uik[3]*u[8] + uik[4]*u[9];
1783       dk[6] +=  uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9];
1784       dk[7] += uik[10]*u[5]+ uik[11]*u[6]+ uik[12]*u[7]+ uik[13]*u[8]+ uik[14]*u[9];
1785       dk[8] += uik[15]*u[5]+ uik[16]*u[6]+ uik[17]*u[7]+ uik[18]*u[8]+ uik[19]*u[9];
1786       dk[9] += uik[20]*u[5]+ uik[21]*u[6]+ uik[22]*u[7]+ uik[23]*u[8]+ uik[24]*u[9];
1787 
1788       dk[10] +=  uik[0]*u[10] + uik[1]*u[11] + uik[2]*u[12] + uik[3]*u[13] + uik[4]*u[14];
1789       dk[11] +=  uik[5]*u[10] + uik[6]*u[11] + uik[7]*u[12] + uik[8]*u[13] + uik[9]*u[14];
1790       dk[12] += uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14];
1791       dk[13] += uik[15]*u[10]+ uik[16]*u[11]+ uik[17]*u[12]+ uik[18]*u[13]+ uik[19]*u[14];
1792       dk[14] += uik[20]*u[10]+ uik[21]*u[11]+ uik[22]*u[12]+ uik[23]*u[13]+ uik[24]*u[14];
1793 
1794       dk[15] +=  uik[0]*u[15] + uik[1]*u[16] + uik[2]*u[17] + uik[3]*u[18] + uik[4]*u[19];
1795       dk[16] +=  uik[5]*u[15] + uik[6]*u[16] + uik[7]*u[17] + uik[8]*u[18] + uik[9]*u[19];
1796       dk[17] += uik[10]*u[15]+ uik[11]*u[16]+ uik[12]*u[17]+ uik[13]*u[18]+ uik[14]*u[19];
1797       dk[18] += uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19];
1798       dk[19] += uik[20]*u[15]+ uik[21]*u[16]+ uik[22]*u[17]+ uik[23]*u[18]+ uik[24]*u[19];
1799 
1800       dk[20] +=  uik[0]*u[20] + uik[1]*u[21] + uik[2]*u[22] + uik[3]*u[23] + uik[4]*u[24];
1801       dk[21] +=  uik[5]*u[20] + uik[6]*u[21] + uik[7]*u[22] + uik[8]*u[23] + uik[9]*u[24];
1802       dk[22] += uik[10]*u[20]+ uik[11]*u[21]+ uik[12]*u[22]+ uik[13]*u[23]+ uik[14]*u[24];
1803       dk[23] += uik[15]*u[20]+ uik[16]*u[21]+ uik[17]*u[22]+ uik[18]*u[23]+ uik[19]*u[24];
1804       dk[24] += uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24];
1805 
1806       /* update -U(i,k) */
1807       ierr = PetscMemcpy(ba+ili*25,uik,25*sizeof(MatScalar));CHKERRQ(ierr);
1808 
1809       /* add multiple of row i to k-th row ... */
1810       jmin = ili + 1; jmax = bi[i+1];
1811       if (jmin < jmax){
1812         for (j=jmin; j<jmax; j++) {
1813           /* rtmp += -U(i,k)^T * U_bar(i,j) */
1814           rtmp_ptr = rtmp + bj[j]*25;
1815           u = ba + j*25;
1816           rtmp_ptr[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4];
1817           rtmp_ptr[1] +=  uik[5]*u[0] + uik[6]*u[1] + uik[7]*u[2] + uik[8]*u[3] + uik[9]*u[4];
1818           rtmp_ptr[2] += uik[10]*u[0]+ uik[11]*u[1]+ uik[12]*u[2]+ uik[13]*u[3]+ uik[14]*u[4];
1819           rtmp_ptr[3] += uik[15]*u[0]+ uik[16]*u[1]+ uik[17]*u[2]+ uik[18]*u[3]+ uik[19]*u[4];
1820           rtmp_ptr[4] += uik[20]*u[0]+ uik[21]*u[1]+ uik[22]*u[2]+ uik[23]*u[3]+ uik[24]*u[4];
1821 
1822           rtmp_ptr[5] +=  uik[0]*u[5] + uik[1]*u[6] + uik[2]*u[7] + uik[3]*u[8] + uik[4]*u[9];
1823           rtmp_ptr[6] +=  uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9];
1824           rtmp_ptr[7] += uik[10]*u[5]+ uik[11]*u[6]+ uik[12]*u[7]+ uik[13]*u[8]+ uik[14]*u[9];
1825           rtmp_ptr[8] += uik[15]*u[5]+ uik[16]*u[6]+ uik[17]*u[7]+ uik[18]*u[8]+ uik[19]*u[9];
1826           rtmp_ptr[9] += uik[20]*u[5]+ uik[21]*u[6]+ uik[22]*u[7]+ uik[23]*u[8]+ uik[24]*u[9];
1827 
1828           rtmp_ptr[10] +=  uik[0]*u[10] + uik[1]*u[11] + uik[2]*u[12] + uik[3]*u[13] + uik[4]*u[14];
1829           rtmp_ptr[11] +=  uik[5]*u[10] + uik[6]*u[11] + uik[7]*u[12] + uik[8]*u[13] + uik[9]*u[14];
1830           rtmp_ptr[12] += uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14];
1831           rtmp_ptr[13] += uik[15]*u[10]+ uik[16]*u[11]+ uik[17]*u[12]+ uik[18]*u[13]+ uik[19]*u[14];
1832           rtmp_ptr[14] += uik[20]*u[10]+ uik[21]*u[11]+ uik[22]*u[12]+ uik[23]*u[13]+ uik[24]*u[14];
1833 
1834           rtmp_ptr[15] +=  uik[0]*u[15] + uik[1]*u[16] + uik[2]*u[17] + uik[3]*u[18] + uik[4]*u[19];
1835           rtmp_ptr[16] +=  uik[5]*u[15] + uik[6]*u[16] + uik[7]*u[17] + uik[8]*u[18] + uik[9]*u[19];
1836           rtmp_ptr[17] += uik[10]*u[15]+ uik[11]*u[16]+ uik[12]*u[17]+ uik[13]*u[18]+ uik[14]*u[19];
1837           rtmp_ptr[18] += uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19];
1838           rtmp_ptr[19] += uik[20]*u[15]+ uik[21]*u[16]+ uik[22]*u[17]+ uik[23]*u[18]+ uik[24]*u[19];
1839 
1840           rtmp_ptr[20] +=  uik[0]*u[20] + uik[1]*u[21] + uik[2]*u[22] + uik[3]*u[23] + uik[4]*u[24];
1841           rtmp_ptr[21] +=  uik[5]*u[20] + uik[6]*u[21] + uik[7]*u[22] + uik[8]*u[23] + uik[9]*u[24];
1842           rtmp_ptr[22] += uik[10]*u[20]+ uik[11]*u[21]+ uik[12]*u[22]+ uik[13]*u[23]+ uik[14]*u[24];
1843           rtmp_ptr[23] += uik[15]*u[20]+ uik[16]*u[21]+ uik[17]*u[22]+ uik[18]*u[23]+ uik[19]*u[24];
1844           rtmp_ptr[24] += uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24];
1845         }
1846 
1847         /* ... add i to row list for next nonzero entry */
1848         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1849         j     = bj[jmin];
1850         jl[i] = jl[j]; jl[j] = i; /* update jl */
1851       }
1852       i = nexti;
1853     }
1854 
1855     /* save nonzero entries in k-th row of U ... */
1856 
1857     /* invert diagonal block */
1858     d = ba+k*25;
1859     ierr = PetscMemcpy(d,dk,25*sizeof(MatScalar));CHKERRQ(ierr);
1860     ierr = Kernel_A_gets_inverse_A_5(d);CHKERRQ(ierr);
1861 
1862     jmin = bi[k]; jmax = bi[k+1];
1863     if (jmin < jmax) {
1864       for (j=jmin; j<jmax; j++){
1865          vj = bj[j];           /* block col. index of U */
1866          u   = ba + j*25;
1867          rtmp_ptr = rtmp + vj*25;
1868          for (k1=0; k1<25; k1++){
1869            *u++        = *rtmp_ptr;
1870            *rtmp_ptr++ = 0.0;
1871          }
1872       }
1873 
1874       /* ... add k to row list for first nonzero entry in k-th row */
1875       il[k] = jmin;
1876       i     = bj[jmin];
1877       jl[k] = jl[i]; jl[i] = k;
1878     }
1879   }
1880 
1881   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1882   ierr = PetscFree(il);CHKERRQ(ierr);
1883   ierr = PetscFree(jl);CHKERRQ(ierr);
1884   ierr = PetscFree(dk);CHKERRQ(ierr);
1885   ierr = PetscFree(uik);CHKERRQ(ierr);
1886   if (a->permute){
1887     ierr = PetscFree(aa);CHKERRQ(ierr);
1888   }
1889 
1890   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
1891   C->factor    = FACTOR_CHOLESKY;
1892   C->assembled = PETSC_TRUE;
1893   C->preallocated = PETSC_TRUE;
1894   PLogFlops(1.3333*125*b->mbs); /* from inverting diagonal blocks */
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 /*
1899       Version for when blocks are 5 by 5 Using natural ordering
1900 */
1901 #undef __FUNC__
1902 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering"
1903 int MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering(Mat A,Mat *B)
1904 {
1905   Mat                C = *B;
1906   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1907   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1908   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1909   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
1910   MatScalar          *u,*d,*rtmp,*rtmp_ptr;
1911 
1912   PetscFunctionBegin;
1913 
1914   /* initialization */
1915   rtmp  = (MatScalar*)PetscMalloc(25*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
1916   ierr = PetscMemzero(rtmp,25*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1917   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
1918   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
1919   for (i=0; i<mbs; i++) {
1920     jl[i] = mbs; il[0] = 0;
1921   }
1922   dk    = (MatScalar*)PetscMalloc(25*sizeof(MatScalar));CHKPTRQ(dk);
1923   uik   = (MatScalar*)PetscMalloc(25*sizeof(MatScalar));CHKPTRQ(uik);
1924 
1925   ai = a->i; aj = a->j; aa = a->a;
1926 
1927   /* for each row k */
1928   for (k = 0; k<mbs; k++){
1929 
1930     /*initialize k-th row with elements nonzero in row k of A */
1931     jmin = ai[k]; jmax = ai[k+1];
1932     if (jmin < jmax) {
1933       ap = aa + jmin*25;
1934       for (j = jmin; j < jmax; j++){
1935         vj = aj[j];         /* block col. index */
1936         rtmp_ptr = rtmp + vj*25;
1937         for (i=0; i<25; i++) *rtmp_ptr++ = *ap++;
1938       }
1939     }
1940 
1941     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1942     ierr = PetscMemcpy(dk,rtmp+k*25,25*sizeof(MatScalar));CHKERRQ(ierr);
1943     i = jl[k]; /* first row to be added to k_th row  */
1944 
1945     while (i < mbs){
1946       nexti = jl[i]; /* next row to be added to k_th row */
1947 
1948       /* compute multiplier */
1949       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1950 
1951       /* uik = -inv(Di)*U_bar(i,k) */
1952       d = ba + i*25;
1953       u    = ba + ili*25;
1954 
1955       uik[0] = -(d[0]*u[0] + d[5]*u[1] + d[10]*u[2] + d[15]*u[3] + d[20]*u[4]);
1956       uik[1] = -(d[1]*u[0] + d[6]*u[1] + d[11]*u[2] + d[16]*u[3] + d[21]*u[4]);
1957       uik[2] = -(d[2]*u[0] + d[7]*u[1] + d[12]*u[2] + d[17]*u[3] + d[22]*u[4]);
1958       uik[3] = -(d[3]*u[0] + d[8]*u[1] + d[13]*u[2] + d[18]*u[3] + d[23]*u[4]);
1959       uik[4] = -(d[4]*u[0] + d[9]*u[1] + d[14]*u[2] + d[19]*u[3] + d[24]*u[4]);
1960 
1961       uik[5] = -(d[0]*u[5] + d[5]*u[6] + d[10]*u[7] + d[15]*u[8] + d[20]*u[9]);
1962       uik[6] = -(d[1]*u[5] + d[6]*u[6] + d[11]*u[7] + d[16]*u[8] + d[21]*u[9]);
1963       uik[7] = -(d[2]*u[5] + d[7]*u[6] + d[12]*u[7] + d[17]*u[8] + d[22]*u[9]);
1964       uik[8] = -(d[3]*u[5] + d[8]*u[6] + d[13]*u[7] + d[18]*u[8] + d[23]*u[9]);
1965       uik[9] = -(d[4]*u[5] + d[9]*u[6] + d[14]*u[7] + d[19]*u[8] + d[24]*u[9]);
1966 
1967       uik[10]= -(d[0]*u[10] + d[5]*u[11] + d[10]*u[12] + d[15]*u[13] + d[20]*u[14]);
1968       uik[11]= -(d[1]*u[10] + d[6]*u[11] + d[11]*u[12] + d[16]*u[13] + d[21]*u[14]);
1969       uik[12]= -(d[2]*u[10] + d[7]*u[11] + d[12]*u[12] + d[17]*u[13] + d[22]*u[14]);
1970       uik[13]= -(d[3]*u[10] + d[8]*u[11] + d[13]*u[12] + d[18]*u[13] + d[23]*u[14]);
1971       uik[14]= -(d[4]*u[10] + d[9]*u[11] + d[14]*u[12] + d[19]*u[13] + d[24]*u[14]);
1972 
1973       uik[15]= -(d[0]*u[15] + d[5]*u[16] + d[10]*u[17] + d[15]*u[18] + d[20]*u[19]);
1974       uik[16]= -(d[1]*u[15] + d[6]*u[16] + d[11]*u[17] + d[16]*u[18] + d[21]*u[19]);
1975       uik[17]= -(d[2]*u[15] + d[7]*u[16] + d[12]*u[17] + d[17]*u[18] + d[22]*u[19]);
1976       uik[18]= -(d[3]*u[15] + d[8]*u[16] + d[13]*u[17] + d[18]*u[18] + d[23]*u[19]);
1977       uik[19]= -(d[4]*u[15] + d[9]*u[16] + d[14]*u[17] + d[19]*u[18] + d[24]*u[19]);
1978 
1979       uik[20]= -(d[0]*u[20] + d[5]*u[21] + d[10]*u[22] + d[15]*u[23] + d[20]*u[24]);
1980       uik[21]= -(d[1]*u[20] + d[6]*u[21] + d[11]*u[22] + d[16]*u[23] + d[21]*u[24]);
1981       uik[22]= -(d[2]*u[20] + d[7]*u[21] + d[12]*u[22] + d[17]*u[23] + d[22]*u[24]);
1982       uik[23]= -(d[3]*u[20] + d[8]*u[21] + d[13]*u[22] + d[18]*u[23] + d[23]*u[24]);
1983       uik[24]= -(d[4]*u[20] + d[9]*u[21] + d[14]*u[22] + d[19]*u[23] + d[24]*u[24]);
1984 
1985 
1986       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
1987       dk[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4];
1988       dk[1] +=  uik[5]*u[0] + uik[6]*u[1] + uik[7]*u[2] + uik[8]*u[3] + uik[9]*u[4];
1989       dk[2] += uik[10]*u[0]+ uik[11]*u[1]+ uik[12]*u[2]+ uik[13]*u[3]+ uik[14]*u[4];
1990       dk[3] += uik[15]*u[0]+ uik[16]*u[1]+ uik[17]*u[2]+ uik[18]*u[3]+ uik[19]*u[4];
1991       dk[4] += uik[20]*u[0]+ uik[21]*u[1]+ uik[22]*u[2]+ uik[23]*u[3]+ uik[24]*u[4];
1992 
1993       dk[5] +=  uik[0]*u[5] + uik[1]*u[6] + uik[2]*u[7] + uik[3]*u[8] + uik[4]*u[9];
1994       dk[6] +=  uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9];
1995       dk[7] += uik[10]*u[5]+ uik[11]*u[6]+ uik[12]*u[7]+ uik[13]*u[8]+ uik[14]*u[9];
1996       dk[8] += uik[15]*u[5]+ uik[16]*u[6]+ uik[17]*u[7]+ uik[18]*u[8]+ uik[19]*u[9];
1997       dk[9] += uik[20]*u[5]+ uik[21]*u[6]+ uik[22]*u[7]+ uik[23]*u[8]+ uik[24]*u[9];
1998 
1999       dk[10] +=  uik[0]*u[10] + uik[1]*u[11] + uik[2]*u[12] + uik[3]*u[13] + uik[4]*u[14];
2000       dk[11] +=  uik[5]*u[10] + uik[6]*u[11] + uik[7]*u[12] + uik[8]*u[13] + uik[9]*u[14];
2001       dk[12] += uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14];
2002       dk[13] += uik[15]*u[10]+ uik[16]*u[11]+ uik[17]*u[12]+ uik[18]*u[13]+ uik[19]*u[14];
2003       dk[14] += uik[20]*u[10]+ uik[21]*u[11]+ uik[22]*u[12]+ uik[23]*u[13]+ uik[24]*u[14];
2004 
2005       dk[15] +=  uik[0]*u[15] + uik[1]*u[16] + uik[2]*u[17] + uik[3]*u[18] + uik[4]*u[19];
2006       dk[16] +=  uik[5]*u[15] + uik[6]*u[16] + uik[7]*u[17] + uik[8]*u[18] + uik[9]*u[19];
2007       dk[17] += uik[10]*u[15]+ uik[11]*u[16]+ uik[12]*u[17]+ uik[13]*u[18]+ uik[14]*u[19];
2008       dk[18] += uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19];
2009       dk[19] += uik[20]*u[15]+ uik[21]*u[16]+ uik[22]*u[17]+ uik[23]*u[18]+ uik[24]*u[19];
2010 
2011       dk[20] +=  uik[0]*u[20] + uik[1]*u[21] + uik[2]*u[22] + uik[3]*u[23] + uik[4]*u[24];
2012       dk[21] +=  uik[5]*u[20] + uik[6]*u[21] + uik[7]*u[22] + uik[8]*u[23] + uik[9]*u[24];
2013       dk[22] += uik[10]*u[20]+ uik[11]*u[21]+ uik[12]*u[22]+ uik[13]*u[23]+ uik[14]*u[24];
2014       dk[23] += uik[15]*u[20]+ uik[16]*u[21]+ uik[17]*u[22]+ uik[18]*u[23]+ uik[19]*u[24];
2015       dk[24] += uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24];
2016 
2017       /* update -U(i,k) */
2018       ierr = PetscMemcpy(ba+ili*25,uik,25*sizeof(MatScalar));CHKERRQ(ierr);
2019 
2020       /* add multiple of row i to k-th row ... */
2021       jmin = ili + 1; jmax = bi[i+1];
2022       if (jmin < jmax){
2023         for (j=jmin; j<jmax; j++) {
2024           /* rtmp += -U(i,k)^T * U_bar(i,j) */
2025           rtmp_ptr = rtmp + bj[j]*25;
2026           u = ba + j*25;
2027           rtmp_ptr[0] +=  uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4];
2028           rtmp_ptr[1] +=  uik[5]*u[0] + uik[6]*u[1] + uik[7]*u[2] + uik[8]*u[3] + uik[9]*u[4];
2029           rtmp_ptr[2] += uik[10]*u[0]+ uik[11]*u[1]+ uik[12]*u[2]+ uik[13]*u[3]+ uik[14]*u[4];
2030           rtmp_ptr[3] += uik[15]*u[0]+ uik[16]*u[1]+ uik[17]*u[2]+ uik[18]*u[3]+ uik[19]*u[4];
2031           rtmp_ptr[4] += uik[20]*u[0]+ uik[21]*u[1]+ uik[22]*u[2]+ uik[23]*u[3]+ uik[24]*u[4];
2032 
2033           rtmp_ptr[5] +=  uik[0]*u[5] + uik[1]*u[6] + uik[2]*u[7] + uik[3]*u[8] + uik[4]*u[9];
2034           rtmp_ptr[6] +=  uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9];
2035           rtmp_ptr[7] += uik[10]*u[5]+ uik[11]*u[6]+ uik[12]*u[7]+ uik[13]*u[8]+ uik[14]*u[9];
2036           rtmp_ptr[8] += uik[15]*u[5]+ uik[16]*u[6]+ uik[17]*u[7]+ uik[18]*u[8]+ uik[19]*u[9];
2037           rtmp_ptr[9] += uik[20]*u[5]+ uik[21]*u[6]+ uik[22]*u[7]+ uik[23]*u[8]+ uik[24]*u[9];
2038 
2039           rtmp_ptr[10] +=  uik[0]*u[10] + uik[1]*u[11] + uik[2]*u[12] + uik[3]*u[13] + uik[4]*u[14];
2040           rtmp_ptr[11] +=  uik[5]*u[10] + uik[6]*u[11] + uik[7]*u[12] + uik[8]*u[13] + uik[9]*u[14];
2041           rtmp_ptr[12] += uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14];
2042           rtmp_ptr[13] += uik[15]*u[10]+ uik[16]*u[11]+ uik[17]*u[12]+ uik[18]*u[13]+ uik[19]*u[14];
2043           rtmp_ptr[14] += uik[20]*u[10]+ uik[21]*u[11]+ uik[22]*u[12]+ uik[23]*u[13]+ uik[24]*u[14];
2044 
2045           rtmp_ptr[15] +=  uik[0]*u[15] + uik[1]*u[16] + uik[2]*u[17] + uik[3]*u[18] + uik[4]*u[19];
2046           rtmp_ptr[16] +=  uik[5]*u[15] + uik[6]*u[16] + uik[7]*u[17] + uik[8]*u[18] + uik[9]*u[19];
2047           rtmp_ptr[17] += uik[10]*u[15]+ uik[11]*u[16]+ uik[12]*u[17]+ uik[13]*u[18]+ uik[14]*u[19];
2048           rtmp_ptr[18] += uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19];
2049           rtmp_ptr[19] += uik[20]*u[15]+ uik[21]*u[16]+ uik[22]*u[17]+ uik[23]*u[18]+ uik[24]*u[19];
2050 
2051           rtmp_ptr[20] +=  uik[0]*u[20] + uik[1]*u[21] + uik[2]*u[22] + uik[3]*u[23] + uik[4]*u[24];
2052           rtmp_ptr[21] +=  uik[5]*u[20] + uik[6]*u[21] + uik[7]*u[22] + uik[8]*u[23] + uik[9]*u[24];
2053           rtmp_ptr[22] += uik[10]*u[20]+ uik[11]*u[21]+ uik[12]*u[22]+ uik[13]*u[23]+ uik[14]*u[24];
2054           rtmp_ptr[23] += uik[15]*u[20]+ uik[16]*u[21]+ uik[17]*u[22]+ uik[18]*u[23]+ uik[19]*u[24];
2055           rtmp_ptr[24] += uik[20]*u[20]+ uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24];
2056         }
2057 
2058         /* ... add i to row list for next nonzero entry */
2059         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2060         j     = bj[jmin];
2061         jl[i] = jl[j]; jl[j] = i; /* update jl */
2062       }
2063       i = nexti;
2064     }
2065 
2066     /* save nonzero entries in k-th row of U ... */
2067 
2068     /* invert diagonal block */
2069     d = ba+k*25;
2070     ierr = PetscMemcpy(d,dk,25*sizeof(MatScalar));CHKERRQ(ierr);
2071     ierr = Kernel_A_gets_inverse_A_5(d);CHKERRQ(ierr);
2072 
2073     jmin = bi[k]; jmax = bi[k+1];
2074     if (jmin < jmax) {
2075       for (j=jmin; j<jmax; j++){
2076          vj = bj[j];           /* block col. index of U */
2077          u   = ba + j*25;
2078          rtmp_ptr = rtmp + vj*25;
2079          for (k1=0; k1<25; k1++){
2080            *u++        = *rtmp_ptr;
2081            *rtmp_ptr++ = 0.0;
2082          }
2083       }
2084 
2085       /* ... add k to row list for first nonzero entry in k-th row */
2086       il[k] = jmin;
2087       i     = bj[jmin];
2088       jl[k] = jl[i]; jl[i] = k;
2089     }
2090   }
2091 
2092   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2093   ierr = PetscFree(il);CHKERRQ(ierr);
2094   ierr = PetscFree(jl);CHKERRQ(ierr);
2095   ierr = PetscFree(dk);CHKERRQ(ierr);
2096   ierr = PetscFree(uik);CHKERRQ(ierr);
2097 
2098   C->factor    = FACTOR_CHOLESKY;
2099   C->assembled = PETSC_TRUE;
2100   C->preallocated = PETSC_TRUE;
2101   PLogFlops(1.3333*125*b->mbs); /* from inverting diagonal blocks */
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*
2106       Version for when blocks are 4 by 4 Using natural ordering
2107 */
2108 #undef __FUNC__
2109 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering"
2110 int MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(Mat A,Mat *B)
2111 {
2112   Mat                C = *B;
2113   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
2114   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
2115   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
2116   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
2117   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
2118 
2119   PetscFunctionBegin;
2120 
2121   /* initialization */
2122   rtmp  = (MatScalar*)PetscMalloc(16*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
2123   ierr = PetscMemzero(rtmp,16*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2124   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
2125   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
2126   for (i=0; i<mbs; i++) {
2127     jl[i] = mbs; il[0] = 0;
2128   }
2129   dk    = (MatScalar*)PetscMalloc(16*sizeof(MatScalar));CHKPTRQ(dk);
2130   uik   = (MatScalar*)PetscMalloc(16*sizeof(MatScalar));CHKPTRQ(uik);
2131 
2132   ai = a->i; aj = a->j; aa = a->a;
2133 
2134   /* for each row k */
2135   for (k = 0; k<mbs; k++){
2136 
2137     /*initialize k-th row with elements nonzero in row k of A */
2138     jmin = ai[k]; jmax = ai[k+1];
2139     if (jmin < jmax) {
2140       ap = aa + jmin*16;
2141       for (j = jmin; j < jmax; j++){
2142         vj = aj[j];         /* block col. index */
2143         rtmp_ptr = rtmp + vj*16;
2144         for (i=0; i<16; i++) *rtmp_ptr++ = *ap++;
2145       }
2146     }
2147 
2148     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
2149     ierr = PetscMemcpy(dk,rtmp+k*16,16*sizeof(MatScalar));CHKERRQ(ierr);
2150     i = jl[k]; /* first row to be added to k_th row  */
2151 
2152     while (i < mbs){
2153       nexti = jl[i]; /* next row to be added to k_th row */
2154 
2155       /* compute multiplier */
2156       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2157 
2158       /* uik = -inv(Di)*U_bar(i,k) */
2159       diag = ba + i*16;
2160       u    = ba + ili*16;
2161 
2162       uik[0] = -(diag[0]*u[0] + diag[4]*u[1] + diag[8]*u[2] + diag[12]*u[3]);
2163       uik[1] = -(diag[1]*u[0] + diag[5]*u[1] + diag[9]*u[2] + diag[13]*u[3]);
2164       uik[2] = -(diag[2]*u[0] + diag[6]*u[1] + diag[10]*u[2]+ diag[14]*u[3]);
2165       uik[3] = -(diag[3]*u[0] + diag[7]*u[1] + diag[11]*u[2]+ diag[15]*u[3]);
2166 
2167       uik[4] = -(diag[0]*u[4] + diag[4]*u[5] + diag[8]*u[6] + diag[12]*u[7]);
2168       uik[5] = -(diag[1]*u[4] + diag[5]*u[5] + diag[9]*u[6] + diag[13]*u[7]);
2169       uik[6] = -(diag[2]*u[4] + diag[6]*u[5] + diag[10]*u[6]+ diag[14]*u[7]);
2170       uik[7] = -(diag[3]*u[4] + diag[7]*u[5] + diag[11]*u[6]+ diag[15]*u[7]);
2171 
2172       uik[8] = -(diag[0]*u[8] + diag[4]*u[9] + diag[8]*u[10] + diag[12]*u[11]);
2173       uik[9] = -(diag[1]*u[8] + diag[5]*u[9] + diag[9]*u[10] + diag[13]*u[11]);
2174       uik[10]= -(diag[2]*u[8] + diag[6]*u[9] + diag[10]*u[10]+ diag[14]*u[11]);
2175       uik[11]= -(diag[3]*u[8] + diag[7]*u[9] + diag[11]*u[10]+ diag[15]*u[11]);
2176 
2177       uik[12]= -(diag[0]*u[12] + diag[4]*u[13] + diag[8]*u[14] + diag[12]*u[15]);
2178       uik[13]= -(diag[1]*u[12] + diag[5]*u[13] + diag[9]*u[14] + diag[13]*u[15]);
2179       uik[14]= -(diag[2]*u[12] + diag[6]*u[13] + diag[10]*u[14]+ diag[14]*u[15]);
2180       uik[15]= -(diag[3]*u[12] + diag[7]*u[13] + diag[11]*u[14]+ diag[15]*u[15]);
2181 
2182       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
2183       dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3];
2184       dk[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3];
2185       dk[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3];
2186       dk[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3];
2187 
2188       dk[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7];
2189       dk[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7];
2190       dk[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7];
2191       dk[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7];
2192 
2193       dk[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11];
2194       dk[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11];
2195       dk[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11];
2196       dk[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11];
2197 
2198       dk[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15];
2199       dk[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15];
2200       dk[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15];
2201       dk[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15];
2202 
2203       /* update -U(i,k) */
2204       ierr = PetscMemcpy(ba+ili*16,uik,16*sizeof(MatScalar));CHKERRQ(ierr);
2205 
2206       /* add multiple of row i to k-th row ... */
2207       jmin = ili + 1; jmax = bi[i+1];
2208       if (jmin < jmax){
2209         for (j=jmin; j<jmax; j++) {
2210           /* rtmp += -U(i,k)^T * U_bar(i,j) */
2211           rtmp_ptr = rtmp + bj[j]*16;
2212           u = ba + j*16;
2213           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3];
2214           rtmp_ptr[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3];
2215           rtmp_ptr[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3];
2216           rtmp_ptr[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3];
2217 
2218           rtmp_ptr[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7];
2219           rtmp_ptr[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7];
2220           rtmp_ptr[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7];
2221           rtmp_ptr[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7];
2222 
2223           rtmp_ptr[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11];
2224           rtmp_ptr[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11];
2225           rtmp_ptr[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11];
2226           rtmp_ptr[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11];
2227 
2228           rtmp_ptr[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15];
2229           rtmp_ptr[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15];
2230           rtmp_ptr[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15];
2231           rtmp_ptr[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15];
2232         }
2233 
2234         /* ... add i to row list for next nonzero entry */
2235         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2236         j     = bj[jmin];
2237         jl[i] = jl[j]; jl[j] = i; /* update jl */
2238       }
2239       i = nexti;
2240     }
2241 
2242     /* save nonzero entries in k-th row of U ... */
2243 
2244     /* invert diagonal block */
2245     diag = ba+k*16;
2246     ierr = PetscMemcpy(diag,dk,16*sizeof(MatScalar));CHKERRQ(ierr);
2247     ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
2248 
2249     jmin = bi[k]; jmax = bi[k+1];
2250     if (jmin < jmax) {
2251       for (j=jmin; j<jmax; j++){
2252          vj = bj[j];           /* block col. index of U */
2253          u   = ba + j*16;
2254          rtmp_ptr = rtmp + vj*16;
2255          for (k1=0; k1<16; k1++){
2256            *u++        = *rtmp_ptr;
2257            *rtmp_ptr++ = 0.0;
2258          }
2259       }
2260 
2261       /* ... add k to row list for first nonzero entry in k-th row */
2262       il[k] = jmin;
2263       i     = bj[jmin];
2264       jl[k] = jl[i]; jl[i] = k;
2265     }
2266   }
2267 
2268   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2269   ierr = PetscFree(il);CHKERRQ(ierr);
2270   ierr = PetscFree(jl);CHKERRQ(ierr);
2271   ierr = PetscFree(dk);CHKERRQ(ierr);
2272   ierr = PetscFree(uik);CHKERRQ(ierr);
2273 
2274   C->factor    = FACTOR_CHOLESKY;
2275   C->assembled = PETSC_TRUE;
2276   C->preallocated = PETSC_TRUE;
2277   PLogFlops(1.3333*64*b->mbs); /* from inverting diagonal blocks */
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /* Version for when blocks are 4 by 4  */
2282 #undef __FUNC__
2283 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_4"
2284 int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat A,Mat *B)
2285 {
2286   Mat                C = *B;
2287   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
2288   IS                 perm = b->row;
2289   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
2290   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
2291   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
2292   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
2293 
2294   PetscFunctionBegin;
2295   /* initialization */
2296   rtmp  = (MatScalar*)PetscMalloc(16*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
2297   ierr = PetscMemzero(rtmp,16*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2298   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
2299   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
2300   for (i=0; i<mbs; i++) {
2301     jl[i] = mbs; il[0] = 0;
2302   }
2303   dk    = (MatScalar*)PetscMalloc(16*sizeof(MatScalar));CHKPTRQ(dk);
2304   uik   = (MatScalar*)PetscMalloc(16*sizeof(MatScalar));CHKPTRQ(uik);
2305   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
2306 
2307   /* check permutation */
2308   if (!a->permute){
2309     ai = a->i; aj = a->j; aa = a->a;
2310   } else {
2311     ai = a->inew; aj = a->jnew;
2312     aa = (MatScalar*)PetscMalloc(16*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
2313     ierr = PetscMemcpy(aa,a->a,16*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
2314     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
2315     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
2316 
2317     for (i=0; i<mbs; i++){
2318       jmin = ai[i]; jmax = ai[i+1];
2319       for (j=jmin; j<jmax; j++){
2320         while (a2anew[j] != j){
2321           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
2322           for (k1=0; k1<16; k1++){
2323             dk[k1]       = aa[k*16+k1];
2324             aa[k*16+k1] = aa[j*16+k1];
2325             aa[j*16+k1] = dk[k1];
2326           }
2327         }
2328         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
2329         if (i > aj[j]){
2330           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
2331           ap = aa + j*16;                     /* ptr to the beginning of j-th block of aa */
2332           for (k=0; k<16; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
2333           for (k=0; k<4; k++){               /* j-th block of aa <- dk^T */
2334             for (k1=0; k1<4; k1++) *ap++ = dk[k + 4*k1];
2335           }
2336         }
2337       }
2338     }
2339     ierr = PetscFree(a2anew);CHKERRA(ierr);
2340   }
2341 
2342   /* for each row k */
2343   for (k = 0; k<mbs; k++){
2344 
2345     /*initialize k-th row with elements nonzero in row perm(k) of A */
2346     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
2347     if (jmin < jmax) {
2348       ap = aa + jmin*16;
2349       for (j = jmin; j < jmax; j++){
2350         vj = perm_ptr[aj[j]];         /* block col. index */
2351         rtmp_ptr = rtmp + vj*16;
2352         for (i=0; i<16; i++) *rtmp_ptr++ = *ap++;
2353       }
2354     }
2355 
2356     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
2357     ierr = PetscMemcpy(dk,rtmp+k*16,16*sizeof(MatScalar));CHKERRQ(ierr);
2358     i = jl[k]; /* first row to be added to k_th row  */
2359 
2360     while (i < mbs){
2361       nexti = jl[i]; /* next row to be added to k_th row */
2362 
2363       /* compute multiplier */
2364       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2365 
2366       /* uik = -inv(Di)*U_bar(i,k) */
2367       diag = ba + i*16;
2368       u    = ba + ili*16;
2369 
2370       uik[0] = -(diag[0]*u[0] + diag[4]*u[1] + diag[8]*u[2] + diag[12]*u[3]);
2371       uik[1] = -(diag[1]*u[0] + diag[5]*u[1] + diag[9]*u[2] + diag[13]*u[3]);
2372       uik[2] = -(diag[2]*u[0] + diag[6]*u[1] + diag[10]*u[2]+ diag[14]*u[3]);
2373       uik[3] = -(diag[3]*u[0] + diag[7]*u[1] + diag[11]*u[2]+ diag[15]*u[3]);
2374 
2375       uik[4] = -(diag[0]*u[4] + diag[4]*u[5] + diag[8]*u[6] + diag[12]*u[7]);
2376       uik[5] = -(diag[1]*u[4] + diag[5]*u[5] + diag[9]*u[6] + diag[13]*u[7]);
2377       uik[6] = -(diag[2]*u[4] + diag[6]*u[5] + diag[10]*u[6]+ diag[14]*u[7]);
2378       uik[7] = -(diag[3]*u[4] + diag[7]*u[5] + diag[11]*u[6]+ diag[15]*u[7]);
2379 
2380       uik[8] = -(diag[0]*u[8] + diag[4]*u[9] + diag[8]*u[10] + diag[12]*u[11]);
2381       uik[9] = -(diag[1]*u[8] + diag[5]*u[9] + diag[9]*u[10] + diag[13]*u[11]);
2382       uik[10]= -(diag[2]*u[8] + diag[6]*u[9] + diag[10]*u[10]+ diag[14]*u[11]);
2383       uik[11]= -(diag[3]*u[8] + diag[7]*u[9] + diag[11]*u[10]+ diag[15]*u[11]);
2384 
2385       uik[12]= -(diag[0]*u[12] + diag[4]*u[13] + diag[8]*u[14] + diag[12]*u[15]);
2386       uik[13]= -(diag[1]*u[12] + diag[5]*u[13] + diag[9]*u[14] + diag[13]*u[15]);
2387       uik[14]= -(diag[2]*u[12] + diag[6]*u[13] + diag[10]*u[14]+ diag[14]*u[15]);
2388       uik[15]= -(diag[3]*u[12] + diag[7]*u[13] + diag[11]*u[14]+ diag[15]*u[15]);
2389 
2390       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
2391       dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3];
2392       dk[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3];
2393       dk[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3];
2394       dk[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3];
2395 
2396       dk[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7];
2397       dk[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7];
2398       dk[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7];
2399       dk[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7];
2400 
2401       dk[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11];
2402       dk[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11];
2403       dk[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11];
2404       dk[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11];
2405 
2406       dk[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15];
2407       dk[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15];
2408       dk[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15];
2409       dk[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15];
2410 
2411       /* update -U(i,k) */
2412       ierr = PetscMemcpy(ba+ili*16,uik,16*sizeof(MatScalar));CHKERRQ(ierr);
2413 
2414       /* add multiple of row i to k-th row ... */
2415       jmin = ili + 1; jmax = bi[i+1];
2416       if (jmin < jmax){
2417         for (j=jmin; j<jmax; j++) {
2418           /* rtmp += -U(i,k)^T * U_bar(i,j) */
2419           rtmp_ptr = rtmp + bj[j]*16;
2420           u = ba + j*16;
2421           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3];
2422           rtmp_ptr[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3];
2423           rtmp_ptr[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3];
2424           rtmp_ptr[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3];
2425 
2426           rtmp_ptr[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7];
2427           rtmp_ptr[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7];
2428           rtmp_ptr[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7];
2429           rtmp_ptr[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7];
2430 
2431           rtmp_ptr[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11];
2432           rtmp_ptr[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11];
2433           rtmp_ptr[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11];
2434           rtmp_ptr[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11];
2435 
2436           rtmp_ptr[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15];
2437           rtmp_ptr[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15];
2438           rtmp_ptr[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15];
2439           rtmp_ptr[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15];
2440         }
2441 
2442         /* ... add i to row list for next nonzero entry */
2443         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2444         j     = bj[jmin];
2445         jl[i] = jl[j]; jl[j] = i; /* update jl */
2446       }
2447       i = nexti;
2448     }
2449 
2450     /* save nonzero entries in k-th row of U ... */
2451 
2452     /* invert diagonal block */
2453     diag = ba+k*16;
2454     ierr = PetscMemcpy(diag,dk,16*sizeof(MatScalar));CHKERRQ(ierr);
2455     ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
2456 
2457     jmin = bi[k]; jmax = bi[k+1];
2458     if (jmin < jmax) {
2459       for (j=jmin; j<jmax; j++){
2460          vj = bj[j];           /* block col. index of U */
2461          u   = ba + j*16;
2462          rtmp_ptr = rtmp + vj*16;
2463          for (k1=0; k1<16; k1++){
2464            *u++        = *rtmp_ptr;
2465            *rtmp_ptr++ = 0.0;
2466          }
2467       }
2468 
2469       /* ... add k to row list for first nonzero entry in k-th row */
2470       il[k] = jmin;
2471       i     = bj[jmin];
2472       jl[k] = jl[i]; jl[i] = k;
2473     }
2474   }
2475 
2476   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2477   ierr = PetscFree(il);CHKERRQ(ierr);
2478   ierr = PetscFree(jl);CHKERRQ(ierr);
2479   ierr = PetscFree(dk);CHKERRQ(ierr);
2480   ierr = PetscFree(uik);CHKERRQ(ierr);
2481   if (a->permute){
2482     ierr = PetscFree(aa);CHKERRQ(ierr);
2483   }
2484 
2485   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
2486   C->factor    = FACTOR_CHOLESKY;
2487   C->assembled = PETSC_TRUE;
2488   C->preallocated = PETSC_TRUE;
2489   PLogFlops(1.3333*64*b->mbs); /* from inverting diagonal blocks */
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 /* Version for when blocks are 3 by 3  */
2494 #undef __FUNC__
2495 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_3"
2496 int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat A,Mat *B)
2497 {
2498   Mat                C = *B;
2499   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
2500   IS                 perm = b->row;
2501   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
2502   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
2503   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
2504   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
2505 
2506   PetscFunctionBegin;
2507 
2508   /* initialization */
2509   rtmp  = (MatScalar*)PetscMalloc(9*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
2510   ierr = PetscMemzero(rtmp,9*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2511   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
2512   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
2513   for (i=0; i<mbs; i++) {
2514     jl[i] = mbs; il[0] = 0;
2515   }
2516   dk  = (MatScalar*)PetscMalloc(9*sizeof(MatScalar));CHKPTRQ(dk);
2517   uik = (MatScalar*)PetscMalloc(9*sizeof(MatScalar));CHKPTRQ(uik);
2518   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
2519 
2520   /* check permutation */
2521   if (!a->permute){
2522     ai = a->i; aj = a->j; aa = a->a;
2523   } else {
2524     ai = a->inew; aj = a->jnew;
2525     aa = (MatScalar*)PetscMalloc(9*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
2526     ierr = PetscMemcpy(aa,a->a,9*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
2527     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
2528     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
2529 
2530     for (i=0; i<mbs; i++){
2531       jmin = ai[i]; jmax = ai[i+1];
2532       for (j=jmin; j<jmax; j++){
2533         while (a2anew[j] != j){
2534           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
2535           for (k1=0; k1<9; k1++){
2536             dk[k1]       = aa[k*9+k1];
2537             aa[k*9+k1] = aa[j*9+k1];
2538             aa[j*9+k1] = dk[k1];
2539           }
2540         }
2541         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
2542         if (i > aj[j]){
2543           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
2544           ap = aa + j*9;                     /* ptr to the beginning of j-th block of aa */
2545           for (k=0; k<9; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
2546           for (k=0; k<3; k++){               /* j-th block of aa <- dk^T */
2547             for (k1=0; k1<3; k1++) *ap++ = dk[k + 3*k1];
2548           }
2549         }
2550       }
2551     }
2552     ierr = PetscFree(a2anew);CHKERRA(ierr);
2553   }
2554 
2555   /* for each row k */
2556   for (k = 0; k<mbs; k++){
2557 
2558     /*initialize k-th row with elements nonzero in row perm(k) of A */
2559     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
2560     if (jmin < jmax) {
2561       ap = aa + jmin*9;
2562       for (j = jmin; j < jmax; j++){
2563         vj = perm_ptr[aj[j]];         /* block col. index */
2564         rtmp_ptr = rtmp + vj*9;
2565         for (i=0; i<9; i++) *rtmp_ptr++ = *ap++;
2566       }
2567     }
2568 
2569     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
2570     ierr = PetscMemcpy(dk,rtmp+k*9,9*sizeof(MatScalar));CHKERRQ(ierr);
2571     i = jl[k]; /* first row to be added to k_th row  */
2572 
2573     while (i < mbs){
2574       nexti = jl[i]; /* next row to be added to k_th row */
2575 
2576       /* compute multiplier */
2577       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2578 
2579       /* uik = -inv(Di)*U_bar(i,k) */
2580       diag = ba + i*9;
2581       u    = ba + ili*9;
2582 
2583       uik[0] = -(diag[0]*u[0] + diag[3]*u[1] + diag[6]*u[2]);
2584       uik[1] = -(diag[1]*u[0] + diag[4]*u[1] + diag[7]*u[2]);
2585       uik[2] = -(diag[2]*u[0] + diag[5]*u[1] + diag[8]*u[2]);
2586 
2587       uik[3] = -(diag[0]*u[3] + diag[3]*u[4] + diag[6]*u[5]);
2588       uik[4] = -(diag[1]*u[3] + diag[4]*u[4] + diag[7]*u[5]);
2589       uik[5] = -(diag[2]*u[3] + diag[5]*u[4] + diag[8]*u[5]);
2590 
2591       uik[6] = -(diag[0]*u[6] + diag[3]*u[7] + diag[6]*u[8]);
2592       uik[7] = -(diag[1]*u[6] + diag[4]*u[7] + diag[7]*u[8]);
2593       uik[8] = -(diag[2]*u[6] + diag[5]*u[7] + diag[8]*u[8]);
2594 
2595       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
2596       dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2];
2597       dk[1] += uik[3]*u[0] + uik[4]*u[1] + uik[5]*u[2];
2598       dk[2] += uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2];
2599 
2600       dk[3] += uik[0]*u[3] + uik[1]*u[4] + uik[2]*u[5];
2601       dk[4] += uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
2602       dk[5] += uik[6]*u[3] + uik[7]*u[4] + uik[8]*u[5];
2603 
2604       dk[6] += uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8];
2605       dk[7] += uik[3]*u[6] + uik[4]*u[7] + uik[5]*u[8];
2606       dk[8] += uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8];
2607 
2608       /* update -U(i,k) */
2609       ierr = PetscMemcpy(ba+ili*9,uik,9*sizeof(MatScalar));CHKERRQ(ierr);
2610 
2611       /* add multiple of row i to k-th row ... */
2612       jmin = ili + 1; jmax = bi[i+1];
2613       if (jmin < jmax){
2614         for (j=jmin; j<jmax; j++) {
2615           /* rtmp += -U(i,k)^T * U_bar(i,j) */
2616           rtmp_ptr = rtmp + bj[j]*9;
2617           u = ba + j*9;
2618           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2];
2619           rtmp_ptr[1] += uik[3]*u[0] + uik[4]*u[1] + uik[5]*u[2];
2620           rtmp_ptr[2] += uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2];
2621 
2622           rtmp_ptr[3] += uik[0]*u[3] + uik[1]*u[4] + uik[2]*u[5];
2623           rtmp_ptr[4] += uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
2624           rtmp_ptr[5] += uik[6]*u[3] + uik[7]*u[4] + uik[8]*u[5];
2625 
2626           rtmp_ptr[6] += uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8];
2627           rtmp_ptr[7] += uik[3]*u[6] + uik[4]*u[7] + uik[5]*u[8];
2628           rtmp_ptr[8] += uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8];
2629         }
2630 
2631         /* ... add i to row list for next nonzero entry */
2632         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2633         j     = bj[jmin];
2634         jl[i] = jl[j]; jl[j] = i; /* update jl */
2635       }
2636       i = nexti;
2637     }
2638 
2639     /* save nonzero entries in k-th row of U ... */
2640 
2641     /* invert diagonal block */
2642     diag = ba+k*9;
2643     ierr = PetscMemcpy(diag,dk,9*sizeof(MatScalar));CHKERRQ(ierr);
2644     ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
2645 
2646     jmin = bi[k]; jmax = bi[k+1];
2647     if (jmin < jmax) {
2648       for (j=jmin; j<jmax; j++){
2649          vj = bj[j];           /* block col. index of U */
2650          u   = ba + j*9;
2651          rtmp_ptr = rtmp + vj*9;
2652          for (k1=0; k1<9; k1++){
2653            *u++        = *rtmp_ptr;
2654            *rtmp_ptr++ = 0.0;
2655          }
2656       }
2657 
2658       /* ... add k to row list for first nonzero entry in k-th row */
2659       il[k] = jmin;
2660       i     = bj[jmin];
2661       jl[k] = jl[i]; jl[i] = k;
2662     }
2663   }
2664 
2665   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2666   ierr = PetscFree(il);CHKERRQ(ierr);
2667   ierr = PetscFree(jl);CHKERRQ(ierr);
2668   ierr = PetscFree(dk);CHKERRQ(ierr);
2669   ierr = PetscFree(uik);CHKERRQ(ierr);
2670   if (a->permute){
2671     ierr = PetscFree(aa);CHKERRQ(ierr);
2672   }
2673 
2674   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
2675   C->factor    = FACTOR_CHOLESKY;
2676   C->assembled = PETSC_TRUE;
2677   C->preallocated = PETSC_TRUE;
2678   PLogFlops(1.3333*27*b->mbs); /* from inverting diagonal blocks */
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 /*
2683       Version for when blocks are 3 by 3 Using natural ordering
2684 */
2685 #undef __FUNC__
2686 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering"
2687 int MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering(Mat A,Mat *B)
2688 {
2689   Mat                C = *B;
2690   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
2691   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
2692   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
2693   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
2694   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
2695 
2696   PetscFunctionBegin;
2697 
2698   /* initialization */
2699   rtmp  = (MatScalar*)PetscMalloc(9*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
2700   ierr = PetscMemzero(rtmp,9*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2701   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
2702   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
2703   for (i=0; i<mbs; i++) {
2704     jl[i] = mbs; il[0] = 0;
2705   }
2706   dk  = (MatScalar*)PetscMalloc(9*sizeof(MatScalar));CHKPTRQ(dk);
2707   uik = (MatScalar*)PetscMalloc(9*sizeof(MatScalar));CHKPTRQ(uik);
2708 
2709   ai = a->i; aj = a->j; aa = a->a;
2710 
2711   /* for each row k */
2712   for (k = 0; k<mbs; k++){
2713 
2714     /*initialize k-th row with elements nonzero in row k of A */
2715     jmin = ai[k]; jmax = ai[k+1];
2716     if (jmin < jmax) {
2717       ap = aa + jmin*9;
2718       for (j = jmin; j < jmax; j++){
2719         vj = aj[j];         /* block col. index */
2720         rtmp_ptr = rtmp + vj*9;
2721         for (i=0; i<9; i++) *rtmp_ptr++ = *ap++;
2722       }
2723     }
2724 
2725     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
2726     ierr = PetscMemcpy(dk,rtmp+k*9,9*sizeof(MatScalar));CHKERRQ(ierr);
2727     i = jl[k]; /* first row to be added to k_th row  */
2728 
2729     while (i < mbs){
2730       nexti = jl[i]; /* next row to be added to k_th row */
2731 
2732       /* compute multiplier */
2733       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2734 
2735       /* uik = -inv(Di)*U_bar(i,k) */
2736       diag = ba + i*9;
2737       u    = ba + ili*9;
2738 
2739       uik[0] = -(diag[0]*u[0] + diag[3]*u[1] + diag[6]*u[2]);
2740       uik[1] = -(diag[1]*u[0] + diag[4]*u[1] + diag[7]*u[2]);
2741       uik[2] = -(diag[2]*u[0] + diag[5]*u[1] + diag[8]*u[2]);
2742 
2743       uik[3] = -(diag[0]*u[3] + diag[3]*u[4] + diag[6]*u[5]);
2744       uik[4] = -(diag[1]*u[3] + diag[4]*u[4] + diag[7]*u[5]);
2745       uik[5] = -(diag[2]*u[3] + diag[5]*u[4] + diag[8]*u[5]);
2746 
2747       uik[6] = -(diag[0]*u[6] + diag[3]*u[7] + diag[6]*u[8]);
2748       uik[7] = -(diag[1]*u[6] + diag[4]*u[7] + diag[7]*u[8]);
2749       uik[8] = -(diag[2]*u[6] + diag[5]*u[7] + diag[8]*u[8]);
2750 
2751       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
2752       dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2];
2753       dk[1] += uik[3]*u[0] + uik[4]*u[1] + uik[5]*u[2];
2754       dk[2] += uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2];
2755 
2756       dk[3] += uik[0]*u[3] + uik[1]*u[4] + uik[2]*u[5];
2757       dk[4] += uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
2758       dk[5] += uik[6]*u[3] + uik[7]*u[4] + uik[8]*u[5];
2759 
2760       dk[6] += uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8];
2761       dk[7] += uik[3]*u[6] + uik[4]*u[7] + uik[5]*u[8];
2762       dk[8] += uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8];
2763 
2764       /* update -U(i,k) */
2765       ierr = PetscMemcpy(ba+ili*9,uik,9*sizeof(MatScalar));CHKERRQ(ierr);
2766 
2767       /* add multiple of row i to k-th row ... */
2768       jmin = ili + 1; jmax = bi[i+1];
2769       if (jmin < jmax){
2770         for (j=jmin; j<jmax; j++) {
2771           /* rtmp += -U(i,k)^T * U_bar(i,j) */
2772           rtmp_ptr = rtmp + bj[j]*9;
2773           u = ba + j*9;
2774           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2];
2775           rtmp_ptr[1] += uik[3]*u[0] + uik[4]*u[1] + uik[5]*u[2];
2776           rtmp_ptr[2] += uik[6]*u[0] + uik[7]*u[1] + uik[8]*u[2];
2777 
2778           rtmp_ptr[3] += uik[0]*u[3] + uik[1]*u[4] + uik[2]*u[5];
2779           rtmp_ptr[4] += uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5];
2780           rtmp_ptr[5] += uik[6]*u[3] + uik[7]*u[4] + uik[8]*u[5];
2781 
2782           rtmp_ptr[6] += uik[0]*u[6] + uik[1]*u[7] + uik[2]*u[8];
2783           rtmp_ptr[7] += uik[3]*u[6] + uik[4]*u[7] + uik[5]*u[8];
2784           rtmp_ptr[8] += uik[6]*u[6] + uik[7]*u[7] + uik[8]*u[8];
2785         }
2786 
2787         /* ... add i to row list for next nonzero entry */
2788         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2789         j     = bj[jmin];
2790         jl[i] = jl[j]; jl[j] = i; /* update jl */
2791       }
2792       i = nexti;
2793     }
2794 
2795     /* save nonzero entries in k-th row of U ... */
2796 
2797     /* invert diagonal block */
2798     diag = ba+k*9;
2799     ierr = PetscMemcpy(diag,dk,9*sizeof(MatScalar));CHKERRQ(ierr);
2800     ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
2801 
2802     jmin = bi[k]; jmax = bi[k+1];
2803     if (jmin < jmax) {
2804       for (j=jmin; j<jmax; j++){
2805          vj = bj[j];           /* block col. index of U */
2806          u   = ba + j*9;
2807          rtmp_ptr = rtmp + vj*9;
2808          for (k1=0; k1<9; k1++){
2809            *u++        = *rtmp_ptr;
2810            *rtmp_ptr++ = 0.0;
2811          }
2812       }
2813 
2814       /* ... add k to row list for first nonzero entry in k-th row */
2815       il[k] = jmin;
2816       i     = bj[jmin];
2817       jl[k] = jl[i]; jl[i] = k;
2818     }
2819   }
2820 
2821   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2822   ierr = PetscFree(il);CHKERRQ(ierr);
2823   ierr = PetscFree(jl);CHKERRQ(ierr);
2824   ierr = PetscFree(dk);CHKERRQ(ierr);
2825   ierr = PetscFree(uik);CHKERRQ(ierr);
2826 
2827   C->factor    = FACTOR_CHOLESKY;
2828   C->assembled = PETSC_TRUE;
2829   C->preallocated = PETSC_TRUE;
2830   PLogFlops(1.3333*27*b->mbs); /* from inverting diagonal blocks */
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 /*
2835     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
2836     Version for blocks 2 by 2.
2837 */
2838 #undef __FUNC__
2839 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
2840 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
2841 {
2842   Mat                C = *B;
2843   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
2844   IS                 perm = b->row;
2845   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
2846   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
2847   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
2848   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
2849 
2850   PetscFunctionBegin;
2851 
2852   /* initialization */
2853   /* il and jl record the first nonzero element in each row of the accessing
2854      window U(0:k, k:mbs-1).
2855      jl:    list of rows to be added to uneliminated rows
2856             i>= k: jl(i) is the first row to be added to row i
2857             i<  k: jl(i) is the row following row i in some list of rows
2858             jl(i) = mbs indicates the end of a list
2859      il(i): points to the first nonzero element in columns k,...,mbs-1 of
2860             row i of U */
2861   rtmp  = (MatScalar*)PetscMalloc(4*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
2862   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2863   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
2864   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
2865   for (i=0; i<mbs; i++) {
2866     jl[i] = mbs; il[0] = 0;
2867   }
2868   dk  = (MatScalar*)PetscMalloc(4*sizeof(MatScalar));CHKPTRQ(dk);
2869   uik = (MatScalar*)PetscMalloc(4*sizeof(MatScalar));CHKPTRQ(uik);
2870   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
2871 
2872   /* check permutation */
2873   if (!a->permute){
2874     ai = a->i; aj = a->j; aa = a->a;
2875   } else {
2876     ai = a->inew; aj = a->jnew;
2877     aa = (MatScalar*)PetscMalloc(4*ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
2878     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
2879     a2anew  = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(a2anew);
2880     ierr= PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
2881 
2882     for (i=0; i<mbs; i++){
2883       jmin = ai[i]; jmax = ai[i+1];
2884       for (j=jmin; j<jmax; j++){
2885         while (a2anew[j] != j){
2886           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
2887           for (k1=0; k1<4; k1++){
2888             dk[k1]       = aa[k*4+k1];
2889             aa[k*4+k1] = aa[j*4+k1];
2890             aa[j*4+k1] = dk[k1];
2891           }
2892         }
2893         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
2894         if (i > aj[j]){
2895           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
2896           ap = aa + j*4;     /* ptr to the beginning of the block */
2897           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
2898           ap[1] = ap[2];
2899           ap[2] = dk[1];
2900         }
2901       }
2902     }
2903     ierr = PetscFree(a2anew);CHKERRA(ierr);
2904   }
2905 
2906   /* for each row k */
2907   for (k = 0; k<mbs; k++){
2908 
2909     /*initialize k-th row with elements nonzero in row perm(k) of A */
2910     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
2911     if (jmin < jmax) {
2912       ap = aa + jmin*4;
2913       for (j = jmin; j < jmax; j++){
2914         vj = perm_ptr[aj[j]];         /* block col. index */
2915         rtmp_ptr = rtmp + vj*4;
2916         for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
2917       }
2918     }
2919 
2920     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
2921     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
2922     i = jl[k]; /* first row to be added to k_th row  */
2923 
2924     while (i < mbs){
2925       nexti = jl[i]; /* next row to be added to k_th row */
2926 
2927       /* compute multiplier */
2928       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
2929 
2930       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
2931       diag = ba + i*4;
2932       u    = ba + ili*4;
2933       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
2934       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
2935       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
2936       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
2937 
2938       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
2939       dk[0] += uik[0]*u[0] + uik[1]*u[1];
2940       dk[1] += uik[2]*u[0] + uik[3]*u[1];
2941       dk[2] += uik[0]*u[2] + uik[1]*u[3];
2942       dk[3] += uik[2]*u[2] + uik[3]*u[3];
2943 
2944       /* update -U(i,k): ba[ili] = uik */
2945       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
2946 
2947       /* add multiple of row i to k-th row ... */
2948       jmin = ili + 1; jmax = bi[i+1];
2949       if (jmin < jmax){
2950         for (j=jmin; j<jmax; j++) {
2951           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
2952           rtmp_ptr = rtmp + bj[j]*4;
2953           u = ba + j*4;
2954           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
2955           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
2956           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
2957           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
2958         }
2959 
2960         /* ... add i to row list for next nonzero entry */
2961         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
2962         j     = bj[jmin];
2963         jl[i] = jl[j]; jl[j] = i; /* update jl */
2964       }
2965       i = nexti;
2966     }
2967 
2968     /* save nonzero entries in k-th row of U ... */
2969 
2970     /* invert diagonal block */
2971     diag = ba+k*4;
2972     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
2973     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
2974 
2975     jmin = bi[k]; jmax = bi[k+1];
2976     if (jmin < jmax) {
2977       for (j=jmin; j<jmax; j++){
2978          vj = bj[j];           /* block col. index of U */
2979          u   = ba + j*4;
2980          rtmp_ptr = rtmp + vj*4;
2981          for (k1=0; k1<4; k1++){
2982            *u++        = *rtmp_ptr;
2983            *rtmp_ptr++ = 0.0;
2984          }
2985       }
2986 
2987       /* ... add k to row list for first nonzero entry in k-th row */
2988       il[k] = jmin;
2989       i     = bj[jmin];
2990       jl[k] = jl[i]; jl[i] = k;
2991     }
2992   }
2993 
2994   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2995   ierr = PetscFree(il);CHKERRQ(ierr);
2996   ierr = PetscFree(jl);CHKERRQ(ierr);
2997   ierr = PetscFree(dk);CHKERRQ(ierr);
2998   ierr = PetscFree(uik);CHKERRQ(ierr);
2999   if (a->permute) {
3000     ierr = PetscFree(aa);CHKERRQ(ierr);
3001   }
3002   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
3003   C->factor    = FACTOR_CHOLESKY;
3004   C->assembled = PETSC_TRUE;
3005   C->preallocated = PETSC_TRUE;
3006   PLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 /*
3011       Version for when blocks are 2 by 2 Using natural ordering
3012 */
3013 #undef __FUNC__
3014 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
3015 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
3016 {
3017   Mat                C = *B;
3018   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
3019   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
3020   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
3021   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
3022   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
3023 
3024   PetscFunctionBegin;
3025 
3026   /* initialization */
3027   /* il and jl record the first nonzero element in each row of the accessing
3028      window U(0:k, k:mbs-1).
3029      jl:    list of rows to be added to uneliminated rows
3030             i>= k: jl(i) is the first row to be added to row i
3031             i<  k: jl(i) is the row following row i in some list of rows
3032             jl(i) = mbs indicates the end of a list
3033      il(i): points to the first nonzero element in columns k,...,mbs-1 of
3034             row i of U */
3035   rtmp  = (MatScalar*)PetscMalloc(4*mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
3036   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3037   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
3038   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
3039   for (i=0; i<mbs; i++) {
3040     jl[i] = mbs; il[0] = 0;
3041   }
3042   dk  = (MatScalar*)PetscMalloc(4*sizeof(MatScalar));CHKPTRQ(dk);
3043   uik = (MatScalar*)PetscMalloc(4*sizeof(MatScalar));CHKPTRQ(uik);
3044 
3045   ai = a->i; aj = a->j; aa = a->a;
3046 
3047   /* for each row k */
3048   for (k = 0; k<mbs; k++){
3049 
3050     /*initialize k-th row with elements nonzero in row k of A */
3051     jmin = ai[k]; jmax = ai[k+1];
3052     if (jmin < jmax) {
3053       ap = aa + jmin*4;
3054       for (j = jmin; j < jmax; j++){
3055         vj = aj[j];         /* block col. index */
3056         rtmp_ptr = rtmp + vj*4;
3057         for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
3058       }
3059     }
3060 
3061     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
3062     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
3063     i = jl[k]; /* first row to be added to k_th row  */
3064 
3065     while (i < mbs){
3066       nexti = jl[i]; /* next row to be added to k_th row */
3067 
3068       /* compute multiplier */
3069       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
3070 
3071       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
3072       diag = ba + i*4;
3073       u    = ba + ili*4;
3074       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
3075       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
3076       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
3077       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
3078 
3079       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
3080       dk[0] += uik[0]*u[0] + uik[1]*u[1];
3081       dk[1] += uik[2]*u[0] + uik[3]*u[1];
3082       dk[2] += uik[0]*u[2] + uik[1]*u[3];
3083       dk[3] += uik[2]*u[2] + uik[3]*u[3];
3084 
3085       /* update -U(i,k): ba[ili] = uik */
3086       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
3087 
3088       /* add multiple of row i to k-th row ... */
3089       jmin = ili + 1; jmax = bi[i+1];
3090       if (jmin < jmax){
3091         for (j=jmin; j<jmax; j++) {
3092           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
3093           rtmp_ptr = rtmp + bj[j]*4;
3094           u = ba + j*4;
3095           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
3096           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
3097           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
3098           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
3099         }
3100 
3101         /* ... add i to row list for next nonzero entry */
3102         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
3103         j     = bj[jmin];
3104         jl[i] = jl[j]; jl[j] = i; /* update jl */
3105       }
3106       i = nexti;
3107     }
3108 
3109     /* save nonzero entries in k-th row of U ... */
3110 
3111     /* invert diagonal block */
3112     diag = ba+k*4;
3113     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
3114     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
3115 
3116     jmin = bi[k]; jmax = bi[k+1];
3117     if (jmin < jmax) {
3118       for (j=jmin; j<jmax; j++){
3119          vj = bj[j];           /* block col. index of U */
3120          u   = ba + j*4;
3121          rtmp_ptr = rtmp + vj*4;
3122          for (k1=0; k1<4; k1++){
3123            *u++        = *rtmp_ptr;
3124            *rtmp_ptr++ = 0.0;
3125          }
3126       }
3127 
3128       /* ... add k to row list for first nonzero entry in k-th row */
3129       il[k] = jmin;
3130       i     = bj[jmin];
3131       jl[k] = jl[i]; jl[i] = k;
3132     }
3133   }
3134 
3135   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3136   ierr = PetscFree(il);CHKERRQ(ierr);
3137   ierr = PetscFree(jl);CHKERRQ(ierr);
3138   ierr = PetscFree(dk);CHKERRQ(ierr);
3139   ierr = PetscFree(uik);CHKERRQ(ierr);
3140 
3141   C->factor    = FACTOR_CHOLESKY;
3142   C->assembled = PETSC_TRUE;
3143   C->preallocated = PETSC_TRUE;
3144   PLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 /*
3149     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
3150     Version for blocks are 1 by 1.
3151 */
3152 #undef __FUNC__
3153 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
3154 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
3155 {
3156   Mat                C = *B;
3157   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
3158   IS                 ip = b->row;
3159   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
3160   int                *ai,*aj,*r;
3161   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
3162   MatScalar          *rtmp;
3163   MatScalar          *ba = b->a,*aa,ak;
3164   MatScalar          dk,uikdi;
3165 
3166   PetscFunctionBegin;
3167   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
3168   if (!a->permute){
3169     ai = a->i; aj = a->j; aa = a->a;
3170   } else {
3171     ai = a->inew; aj = a->jnew;
3172     aa = (MatScalar*)PetscMalloc(ai[mbs]*sizeof(MatScalar));CHKPTRQ(aa);
3173     ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
3174     r   = (int*)PetscMalloc(ai[mbs]*sizeof(int));CHKPTRQ(r);
3175     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr);
3176 
3177     jmin = ai[0]; jmax = ai[mbs];
3178     for (j=jmin; j<jmax; j++){
3179       while (r[j] != j){
3180         k = r[j]; r[j] = r[k]; r[k] = k;
3181         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
3182       }
3183     }
3184     ierr = PetscFree(r);CHKERRA(ierr);
3185   }
3186 
3187   /* initialization */
3188   /* il and jl record the first nonzero element in each row of the accessing
3189      window U(0:k, k:mbs-1).
3190      jl:    list of rows to be added to uneliminated rows
3191             i>= k: jl(i) is the first row to be added to row i
3192             i<  k: jl(i) is the row following row i in some list of rows
3193             jl(i) = mbs indicates the end of a list
3194      il(i): points to the first nonzero element in columns k,...,mbs-1 of
3195             row i of U */
3196   rtmp  = (MatScalar*)PetscMalloc(mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
3197   il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
3198   jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
3199   for (i=0; i<mbs; i++) {
3200     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
3201   }
3202 
3203   /* for each row k */
3204   for (k = 0; k<mbs; k++){
3205 
3206     /*initialize k-th row with elements nonzero in row perm(k) of A */
3207     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
3208     if (jmin < jmax) {
3209       for (j = jmin; j < jmax; j++){
3210         vj = rip[aj[j]];
3211         rtmp[vj] = aa[j];
3212       }
3213     }
3214 
3215     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
3216     dk = rtmp[k];
3217     i = jl[k]; /* first row to be added to k_th row  */
3218 
3219     while (i < mbs){
3220       nexti = jl[i]; /* next row to be added to k_th row */
3221 
3222       /* compute multiplier, update D(k) and U(i,k) */
3223       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
3224       uikdi = - ba[ili]*ba[i];
3225       dk += uikdi*ba[ili];
3226       ba[ili] = uikdi; /* -U(i,k) */
3227 
3228       /* add multiple of row i to k-th row ... */
3229       jmin = ili + 1; jmax = bi[i+1];
3230       if (jmin < jmax){
3231         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
3232         /* ... add i to row list for next nonzero entry */
3233         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
3234         j     = bj[jmin];
3235         jl[i] = jl[j]; jl[j] = i; /* update jl */
3236       }
3237       i = nexti;
3238     }
3239 
3240     /* check for zero pivot and save diagoanl element */
3241     if (dk == 0.0){
3242       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
3243       /*
3244     }else if (PetscRealPart(dk) < 0){
3245       ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk);
3246       */
3247     }
3248 
3249     /* save nonzero entries in k-th row of U ... */
3250     ba[k] = 1.0/dk;
3251     jmin = bi[k]; jmax = bi[k+1];
3252     if (jmin < jmax) {
3253       for (j=jmin; j<jmax; j++){
3254          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
3255       }
3256       /* ... add k to row list for first nonzero entry in k-th row */
3257       il[k] = jmin;
3258       i     = bj[jmin];
3259       jl[k] = jl[i]; jl[i] = k;
3260     }
3261   }
3262 
3263   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3264   ierr = PetscFree(il);CHKERRQ(ierr);
3265   ierr = PetscFree(jl);CHKERRQ(ierr);
3266   if (a->permute){
3267     ierr = PetscFree(aa);CHKERRQ(ierr);
3268   }
3269 
3270   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
3271   C->factor    = FACTOR_CHOLESKY;
3272   C->assembled = PETSC_TRUE;
3273   C->preallocated = PETSC_TRUE;
3274   PLogFlops(b->mbs);
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 /*
3279   Version for when blocks are 1 by 1 Using natural ordering
3280 */
3281 #undef __FUNC__
3282 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
3283 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
3284 {
3285   Mat                C = *B;
3286   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
3287   int                ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
3288   int                *ai,*aj,*r;
3289   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
3290   MatScalar          *rtmp;
3291   MatScalar          *ba = b->a,*aa,ak;
3292   MatScalar          dk,uikdi;
3293 
3294   PetscFunctionBegin;
3295 
3296   /* initialization */
3297   /* il and jl record the first nonzero element in each row of the accessing
3298      window U(0:k, k:mbs-1).
3299      jl:    list of rows to be added to uneliminated rows
3300             i>= k: jl(i) is the first row to be added to row i
3301             i<  k: jl(i) is the row following row i in some list of rows
3302             jl(i) = mbs indicates the end of a list
3303      il(i): points to the first nonzero element in columns k,...,mbs-1 of
3304             row i of U */
3305   rtmp  = (MatScalar*)PetscMalloc(mbs*sizeof(MatScalar));CHKPTRQ(rtmp);
3306   il    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il);
3307   jl    = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl);
3308   for (i=0; i<mbs; i++) {
3309     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
3310   }
3311 
3312   ai = a->i; aj = a->j; aa = a->a;
3313 
3314   /* for each row k */
3315   for (k = 0; k<mbs; k++){
3316 
3317     /*initialize k-th row with elements nonzero in row perm(k) of A */
3318     jmin = ai[k]; jmax = ai[k+1];
3319     if (jmin < jmax) {
3320       for (j = jmin; j < jmax; j++){
3321         vj = aj[j];
3322         rtmp[vj] = aa[j];
3323       }
3324     }
3325 
3326     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
3327     dk = rtmp[k];
3328     i = jl[k]; /* first row to be added to k_th row  */
3329 
3330     while (i < mbs){
3331       nexti = jl[i]; /* next row to be added to k_th row */
3332 
3333       /* compute multiplier, update D(k) and U(i,k) */
3334       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
3335       uikdi = - ba[ili]*ba[i];
3336       dk += uikdi*ba[ili];
3337       ba[ili] = uikdi; /* -U(i,k) */
3338 
3339       /* add multiple of row i to k-th row ... */
3340       jmin = ili + 1; jmax = bi[i+1];
3341       if (jmin < jmax){
3342         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
3343         /* ... add i to row list for next nonzero entry */
3344         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
3345         j     = bj[jmin];
3346         jl[i] = jl[j]; jl[j] = i; /* update jl */
3347       }
3348       i = nexti;
3349     }
3350 
3351     /* check for zero pivot and save diagoanl element */
3352     if (dk == 0.0){
3353       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
3354       /*
3355     }else if (PetscRealPart(dk) < 0){
3356       ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk);
3357       */
3358     }
3359 
3360     /* save nonzero entries in k-th row of U ... */
3361     ba[k] = 1.0/dk;
3362     jmin = bi[k]; jmax = bi[k+1];
3363     if (jmin < jmax) {
3364       for (j=jmin; j<jmax; j++){
3365          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
3366       }
3367       /* ... add k to row list for first nonzero entry in k-th row */
3368       il[k] = jmin;
3369       i     = bj[jmin];
3370       jl[k] = jl[i]; jl[i] = k;
3371     }
3372   }
3373 
3374   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3375   ierr = PetscFree(il);CHKERRQ(ierr);
3376   ierr = PetscFree(jl);CHKERRQ(ierr);
3377 
3378   C->factor    = FACTOR_CHOLESKY;
3379   C->assembled = PETSC_TRUE;
3380   C->preallocated = PETSC_TRUE;
3381   PLogFlops(b->mbs);
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 #undef __FUNC__
3386 #define __FUNC__ "MatCholeskyFactor_SeqSBAIJ"
3387 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
3388 {
3389   int ierr;
3390   Mat C;
3391 
3392   PetscFunctionBegin;
3393   ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr);
3394   ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr);
3395   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
3396   PetscFunctionReturn(0);
3397 }
3398 
3399 
3400