xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision fff829cf019b5079f7f709086c75661a07a2e190)
1 
2 #include "src/mat/impls/baij/seq/baij.h"
3 #include "src/mat/impls/sbaij/seq/sbaij.h"
4 #include "src/inline/ilu.h"
5 #include "include/petscis.h"
6 
7 #if !defined(PETSC_USE_COMPLEX)
8 /*
9   input:
10    F -- numeric factor
11   output:
12    nneg, nzero, npos: matrix inertia
13 */
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetInertia_SeqSBAIJ"
17 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
18 {
19   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
20   PetscScalar  *dd = fact_ptr->a;
21   PetscInt     mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;
22 
23   PetscFunctionBegin;
24   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
25   nneig_tmp = 0; npos_tmp = 0;
26   for (i=0; i<mbs; i++){
27     if (PetscRealPart(dd[*fi]) > 0.0){
28       npos_tmp++;
29     } else if (PetscRealPart(dd[*fi]) < 0.0){
30       nneig_tmp++;
31     }
32     fi++;
33   }
34   if (nneig) *nneig = nneig_tmp;
35   if (npos)  *npos  = npos_tmp;
36   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;
37 
38   PetscFunctionReturn(0);
39 }
40 #endif /* !defined(PETSC_USE_COMPLEX) */
41 
42 /*
43   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
44   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
45 */
46 #undef __FUNCT__
47 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR"
48 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
49 {
50   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
51   PetscErrorCode ierr;
52   PetscInt       *rip,i,mbs = a->mbs,*ai,*aj;
53   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
54   PetscInt       m,reallocs = 0,prow;
55   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
56   PetscReal      f = info->fill;
57   PetscTruth     perm_identity;
58 
59   PetscFunctionBegin;
60   /* check whether perm is the identity mapping */
61   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
62   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
63 
64   if (perm_identity){ /* without permutation */
65     a->permute = PETSC_FALSE;
66     ai = a->i; aj = a->j;
67   } else {            /* non-trivial permutation */
68     a->permute = PETSC_TRUE;
69     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
70     ai = a->inew; aj = a->jnew;
71   }
72 
73   /* initialization */
74   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
75   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
76   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
77   iu[0] = mbs+1;
78   juidx = mbs + 1; /* index for ju */
79   ierr  = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */
80   q     = jl + mbs;   /* linked list for col index */
81   for (i=0; i<mbs; i++){
82     jl[i] = mbs;
83     q[i] = 0;
84   }
85 
86   /* for each row k */
87   for (k=0; k<mbs; k++){
88     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
89     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
90     q[k] = mbs;
91     /* initialize nonzero structure of k-th row to row rip[k] of A */
92     jmin = ai[rip[k]] +1; /* exclude diag[k] */
93     jmax = ai[rip[k]+1];
94     for (j=jmin; j<jmax; j++){
95       vj = rip[aj[j]]; /* col. value */
96       if(vj > k){
97         qm = k;
98         do {
99           m  = qm; qm = q[m];
100         } while(qm < vj);
101         if (qm == vj) {
102           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
103         }
104         nzk++;
105         q[m]  = vj;
106         q[vj] = qm;
107       } /* if(vj > k) */
108     } /* for (j=jmin; j<jmax; j++) */
109 
110     /* modify nonzero structure of k-th row by computing fill-in
111        for each row i to be merged in */
112     prow = k;
113     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
114 
115     while (prow < k){
116       /* merge row prow into k-th row */
117       jmin = iu[prow] + 1; jmax = iu[prow+1];
118       qm = k;
119       for (j=jmin; j<jmax; j++){
120         vj = ju[j];
121         do {
122           m = qm; qm = q[m];
123         } while (qm < vj);
124         if (qm != vj){
125          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
126         }
127       }
128       prow = jl[prow]; /* next pivot row */
129     }
130 
131     /* add k to row list for first nonzero element in k-th row */
132     if (nzk > 0){
133       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134       jl[k] = jl[i]; jl[i] = k;
135     }
136     iu[k+1] = iu[k] + nzk;
137 
138     /* allocate more space to ju if needed */
139     if (iu[k+1] > umax) {
140       /* estimate how much additional space we will need */
141       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
142       /* just double the memory each time */
143       maxadd = umax;
144       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
145       umax += maxadd;
146 
147       /* allocate a longer ju */
148       ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
149       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
150       ierr = PetscFree(ju);CHKERRQ(ierr);
151       ju   = jutmp;
152       reallocs++; /* count how many times we realloc */
153     }
154 
155     /* save nonzero structure of k-th row in ju */
156     i=k;
157     while (nzk --) {
158       i           = q[i];
159       ju[juidx++] = i;
160     }
161   }
162 
163   if (ai[mbs] != 0) {
164     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
165     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
166     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
167     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
168     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
169   } else {
170      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
171   }
172 
173   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
174   ierr = PetscFree(jl);CHKERRQ(ierr);
175 
176   /* put together the new matrix */
177   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
178   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
179   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
180 
181   /* PetscLogObjectParent(*B,iperm); */
182   b = (Mat_SeqSBAIJ*)(*B)->data;
183   ierr = PetscFree(b->imax);CHKERRQ(ierr);
184   b->singlemalloc = PETSC_FALSE;
185   /* the next line frees the default space generated by the Create() */
186   ierr = PetscFree(b->a);CHKERRQ(ierr);
187   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
188   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
189   b->j    = ju;
190   b->i    = iu;
191   b->diag = 0;
192   b->ilen = 0;
193   b->imax = 0;
194   b->row  = perm;
195   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
196   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
197   b->icol = perm;
198   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
199   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
200   /* In b structure:  Free imax, ilen, old a, old j.
201      Allocate idnew, solve_work, new a, new j */
202   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
203   b->maxnz = b->nz = iu[mbs];
204 
205   (*B)->factor                 = FACTOR_CHOLESKY;
206   (*B)->info.factor_mallocs    = reallocs;
207   (*B)->info.fill_ratio_given  = f;
208   if (ai[mbs] != 0) {
209     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
210   } else {
211     (*B)->info.fill_ratio_needed = 0.0;
212   }
213 
214   if (perm_identity){
215     switch (bs) {
216       case 1:
217         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
218         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
219         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
220         break;
221       case 2:
222         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
223         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
224         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
225         break;
226       case 3:
227         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
228         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
229         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
230         break;
231       case 4:
232         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
233         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
234         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
235         break;
236       case 5:
237         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
238         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
239         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
240         break;
241       case 6:
242         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
243         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
244         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
245         break;
246       case 7:
247         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
248         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
249         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
250       break;
251       default:
252         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
253         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
254         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
255       break;
256     }
257   }
258   PetscFunctionReturn(0);
259 }
260 /*
261     Symbolic U^T*D*U factorization for SBAIJ format.
262 */
263 #undef __FUNCT__
264 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
265 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
266 {
267   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
268   PetscErrorCode ierr;
269   PetscInt       *rip,i,mbs = a->mbs,*ai,*aj;
270   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
271   PetscInt       m,reallocs = 0,prow;
272   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
273   PetscInt       *il,ili,nextprow;
274   PetscReal      f = info->fill;
275   PetscTruth     perm_identity;
276 
277   PetscFunctionBegin;
278   /*
279    This code originally uses Modified Sparse Row (MSR) storage
280    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
281    Then it is rewritten so the factor B takes seqsbaij format. However the associated
282    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
283    thus the original code in MSR format is still used for these cases.
284    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
285    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
286   */
287   /* check whether perm is the identity mapping */
288   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
289   if (bs>1){
290     a->permute = PETSC_TRUE;
291     ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,B);CHKERRQ(ierr);
292     PetscFunctionReturn(0);
293   }
294 
295   /* At present, the code below works only for perm_identity=PETSC_TRUE */
296   if (perm_identity){
297     a->permute = PETSC_FALSE;
298     ai = a->i; aj = a->j;
299   } else {
300     a->permute = PETSC_TRUE;
301     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
302     ai = a->inew; aj = a->jnew;
303   }
304 
305   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
306 
307   /* initialization */
308   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
309   umax  = (PetscInt)(f*ai[mbs] + 1);
310   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
311   iu[0] = 0;
312   juidx = 0; /* index for ju */
313   ierr  = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
314   q     = jl + mbs;   /* linked list for col index of active row */
315   il    = q  + mbs;
316   for (i=0; i<mbs; i++){
317     jl[i] = mbs;
318     q[i]  = 0;
319     il[i] = 0;
320   }
321 
322   /* for each row k */
323   for (k=0; k<mbs; k++){
324     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
325     q[k] = mbs;
326     /* initialize nonzero structure of k-th row to row rip[k] of A */
327     jmin = ai[rip[k]] +1; /* exclude diag[k] */
328     jmax = ai[rip[k]+1];
329     for (j=jmin; j<jmax; j++){
330       vj = rip[aj[j]]; /* col. value */
331       if(vj > k){
332         qm = k;
333         do {
334           m  = qm; qm = q[m];
335         } while(qm < vj);
336         if (qm == vj) {
337           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
338         }
339         nzk++;
340         q[m]  = vj;
341         q[vj] = qm;
342       } /* if(vj > k) */
343     } /* for (j=jmin; j<jmax; j++) */
344 
345     /* modify nonzero structure of k-th row by computing fill-in
346        for each row i to be merged in */
347     prow = k;
348     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
349 
350     while (prow < k){
351       nextprow = jl[prow];
352 
353       /* merge row prow into k-th row */
354       ili = il[prow];
355       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
356       jmax = iu[prow+1];
357       qm = k;
358       for (j=jmin; j<jmax; j++){
359         vj = ju[j];
360         do {
361           m = qm; qm = q[m];
362         } while (qm < vj);
363         if (qm != vj){  /* a fill */
364           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
365         }
366       } /* end of for (j=jmin; j<jmax; j++) */
367       if (jmin < jmax){
368         il[prow] = jmin;
369         j = ju[jmin];
370         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
371       }
372       prow = nextprow;
373     }
374 
375     /* update il and jl */
376     if (nzk > 0){
377       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
378       jl[k] = jl[i]; jl[i] = k;
379       il[k] = iu[k] + 1;
380     }
381     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
382 
383     /* allocate more space to ju if needed */
384     if (iu[k+1] > umax) {
385       /* estimate how much additional space we will need */
386       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
387       /* just double the memory each time */
388       maxadd = umax;
389       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
390       umax += maxadd;
391 
392       /* allocate a longer ju */
393       ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
394       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
395       ierr = PetscFree(ju);CHKERRQ(ierr);
396       ju   = jutmp;
397       reallocs++; /* count how many times we realloc */
398     }
399 
400     /* save nonzero structure of k-th row in ju */
401     ju[juidx++] = k; /* diag[k] */
402     i = k;
403     while (nzk --) {
404       i           = q[i];
405       ju[juidx++] = i;
406     }
407   }
408 
409   if (ai[mbs] != 0) {
410     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
411     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
412     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
413     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
414     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
415   } else {
416      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
417   }
418 
419   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
420   /* ierr = PetscFree(q);CHKERRQ(ierr); */
421   ierr = PetscFree(jl);CHKERRQ(ierr);
422 
423   /* put together the new matrix */
424   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
425   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
426   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
427 
428   /* PetscLogObjectParent(*B,iperm); */
429   b = (Mat_SeqSBAIJ*)(*B)->data;
430   ierr = PetscFree(b->imax);CHKERRQ(ierr);
431   b->singlemalloc = PETSC_FALSE;
432   /* the next line frees the default space generated by the Create() */
433   ierr = PetscFree(b->a);CHKERRQ(ierr);
434   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
435   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
436   b->j    = ju;
437   b->i    = iu;
438   b->diag = 0;
439   b->ilen = 0;
440   b->imax = 0;
441   b->row  = perm;
442   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
443   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
444   b->icol = perm;
445   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
446   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
447   /* In b structure:  Free imax, ilen, old a, old j.
448      Allocate idnew, solve_work, new a, new j */
449   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
450   b->maxnz = b->nz = iu[mbs];
451 
452   (*B)->factor                 = FACTOR_CHOLESKY;
453   (*B)->info.factor_mallocs    = reallocs;
454   (*B)->info.fill_ratio_given  = f;
455   if (ai[mbs] != 0) {
456     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
457   } else {
458     (*B)->info.fill_ratio_needed = 0.0;
459   }
460 
461   if (perm_identity){
462     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
463     (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
464     (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
465     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special natural ordering factor and solve BS=1\n");
466   }
467 #ifdef OLD
468   /* check whether perm is the identity mapping */
469   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
470   if (bs>1 || !perm_identity){
471     a->permute = PETSC_TRUE;
472     ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,B);CHKERRQ(ierr);
473     PetscFunctionReturn(0);
474   }
475 
476   /* At present, the code below works only for perm_identity=PETSC_TRUE */
477   a->permute = PETSC_FALSE;
478 
479   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
480   ai = a->i; aj = a->j;
481 
482   /* initialization */
483   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
484   umax  = (PetscInt)(f*ai[mbs] + 1);
485   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
486   iu[0] = 0;
487   juidx = 0; /* index for ju */
488   ierr  = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
489   q     = jl + mbs;   /* linked list for col index of active row */
490   il    = q  + mbs;
491   for (i=0; i<mbs; i++){
492     jl[i] = mbs;
493     q[i]  = 0;
494     il[i] = 0;
495   }
496 
497   /* for each row k */
498   for (k=0; k<mbs; k++){
499     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
500     q[k] = mbs;
501     /* initialize nonzero structure of k-th row to row rip[k] of A */
502     jmin = ai[rip[k]] +1; /* exclude diag[k] */
503     jmax = ai[rip[k]+1];
504     for (j=jmin; j<jmax; j++){
505       vj = rip[aj[j]]; /* col. value */
506       if(vj > k){
507         qm = k;
508         do {
509           m  = qm; qm = q[m];
510         } while(qm < vj);
511         if (qm == vj) {
512           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
513         }
514         nzk++;
515         q[m]  = vj;
516         q[vj] = qm;
517       } /* if(vj > k) */
518     } /* for (j=jmin; j<jmax; j++) */
519 
520     /* modify nonzero structure of k-th row by computing fill-in
521        for each row i to be merged in */
522     prow = k;
523     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
524 
525     while (prow < k){
526       nextprow = jl[prow];
527 
528       /* merge row prow into k-th row */
529       ili = il[prow];
530       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
531       jmax = iu[prow+1];
532       qm = k;
533       for (j=jmin; j<jmax; j++){
534         vj = ju[j];
535         do {
536           m = qm; qm = q[m];
537         } while (qm < vj);
538         if (qm != vj){  /* a fill */
539           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
540         }
541       } /* end of for (j=jmin; j<jmax; j++) */
542       if (jmin < jmax){
543         il[prow] = jmin;
544         j = ju[jmin];
545         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
546       }
547       prow = nextprow;
548     }
549 
550     /* update il and jl */
551     if (nzk > 0){
552       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
553       jl[k] = jl[i]; jl[i] = k;
554       il[k] = iu[k] + 1;
555     }
556     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
557 
558     /* allocate more space to ju if needed */
559     if (iu[k+1] > umax) {
560       /* estimate how much additional space we will need */
561       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
562       /* just double the memory each time */
563       maxadd = umax;
564       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
565       umax += maxadd;
566 
567       /* allocate a longer ju */
568       ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
569       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
570       ierr = PetscFree(ju);CHKERRQ(ierr);
571       ju   = jutmp;
572       reallocs++; /* count how many times we realloc */
573     }
574 
575     /* save nonzero structure of k-th row in ju */
576     ju[juidx++] = k; /* diag[k] */
577     i = k;
578     while (nzk --) {
579       i           = q[i];
580       ju[juidx++] = i;
581     }
582   }
583 
584   if (ai[mbs] != 0) {
585     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
586     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
587     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
588     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
589     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
590   } else {
591      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
592   }
593 
594   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
595   /* ierr = PetscFree(q);CHKERRQ(ierr); */
596   ierr = PetscFree(jl);CHKERRQ(ierr);
597 
598   /* put together the new matrix */
599   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
600   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
601   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
602 
603   /* PetscLogObjectParent(*B,iperm); */
604   b = (Mat_SeqSBAIJ*)(*B)->data;
605   ierr = PetscFree(b->imax);CHKERRQ(ierr);
606   b->singlemalloc = PETSC_FALSE;
607   /* the next line frees the default space generated by the Create() */
608   ierr = PetscFree(b->a);CHKERRQ(ierr);
609   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
610   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
611   b->j    = ju;
612   b->i    = iu;
613   b->diag = 0;
614   b->ilen = 0;
615   b->imax = 0;
616   b->row  = perm;
617   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
618   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
619   b->icol = perm;
620   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
621   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
622   /* In b structure:  Free imax, ilen, old a, old j.
623      Allocate idnew, solve_work, new a, new j */
624   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
625   b->maxnz = b->nz = iu[mbs];
626 
627   (*B)->factor                 = FACTOR_CHOLESKY;
628   (*B)->info.factor_mallocs    = reallocs;
629   (*B)->info.fill_ratio_given  = f;
630   if (ai[mbs] != 0) {
631     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
632   } else {
633     (*B)->info.fill_ratio_needed = 0.0;
634   }
635 
636   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
637   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
638   (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
639   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
640 #endif /* OLD */
641   PetscFunctionReturn(0);
642 }
643 #undef __FUNCT__
644 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
645 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
646 {
647   Mat            C = *B;
648   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
649   IS             perm = b->row;
650   PetscErrorCode ierr;
651   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
652   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
653   PetscInt       bs=A->bs,bs2 = a->bs2;
654   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
655   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
656   MatScalar      *work;
657   PetscInt       *pivots;
658 
659   PetscFunctionBegin;
660   /* initialization */
661   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
662   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
663   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
664   jl   = il + mbs;
665   for (i=0; i<mbs; i++) {
666     jl[i] = mbs; il[0] = 0;
667   }
668   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
669   uik  = dk + bs2;
670   work = uik + bs2;
671   ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr);
672 
673   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
674 
675   /* check permutation */
676   if (!a->permute){
677     ai = a->i; aj = a->j; aa = a->a;
678   } else {
679     ai   = a->inew; aj = a->jnew;
680     ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
681     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
682     ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr);
683     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr);
684 
685     for (i=0; i<mbs; i++){
686       jmin = ai[i]; jmax = ai[i+1];
687       for (j=jmin; j<jmax; j++){
688         while (a2anew[j] != j){
689           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
690           for (k1=0; k1<bs2; k1++){
691             dk[k1]       = aa[k*bs2+k1];
692             aa[k*bs2+k1] = aa[j*bs2+k1];
693             aa[j*bs2+k1] = dk[k1];
694           }
695         }
696         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
697         if (i > aj[j]){
698           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
699           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
700           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
701           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
702             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
703           }
704         }
705       }
706     }
707     ierr = PetscFree(a2anew);CHKERRQ(ierr);
708   }
709 
710   /* for each row k */
711   for (k = 0; k<mbs; k++){
712 
713     /*initialize k-th row with elements nonzero in row perm(k) of A */
714     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
715 
716     ap = aa + jmin*bs2;
717     for (j = jmin; j < jmax; j++){
718       vj = perm_ptr[aj[j]];         /* block col. index */
719       rtmp_ptr = rtmp + vj*bs2;
720       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
721     }
722 
723     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
724     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
725     i = jl[k]; /* first row to be added to k_th row  */
726 
727     while (i < k){
728       nexti = jl[i]; /* next row to be added to k_th row */
729 
730       /* compute multiplier */
731       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
732 
733       /* uik = -inv(Di)*U_bar(i,k) */
734       diag = ba + i*bs2;
735       u    = ba + ili*bs2;
736       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
737       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
738 
739       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
740       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
741 
742       /* update -U(i,k) */
743       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
744 
745       /* add multiple of row i to k-th row ... */
746       jmin = ili + 1; jmax = bi[i+1];
747       if (jmin < jmax){
748         for (j=jmin; j<jmax; j++) {
749           /* rtmp += -U(i,k)^T * U_bar(i,j) */
750           rtmp_ptr = rtmp + bj[j]*bs2;
751           u = ba + j*bs2;
752           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
753         }
754 
755         /* ... add i to row list for next nonzero entry */
756         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
757         j     = bj[jmin];
758         jl[i] = jl[j]; jl[j] = i; /* update jl */
759       }
760       i = nexti;
761     }
762 
763     /* save nonzero entries in k-th row of U ... */
764 
765     /* invert diagonal block */
766     diag = ba+k*bs2;
767     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
768     ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr);
769 
770     jmin = bi[k]; jmax = bi[k+1];
771     if (jmin < jmax) {
772       for (j=jmin; j<jmax; j++){
773          vj = bj[j];           /* block col. index of U */
774          u   = ba + j*bs2;
775          rtmp_ptr = rtmp + vj*bs2;
776          for (k1=0; k1<bs2; k1++){
777            *u++        = *rtmp_ptr;
778            *rtmp_ptr++ = 0.0;
779          }
780       }
781 
782       /* ... add k to row list for first nonzero entry in k-th row */
783       il[k] = jmin;
784       i     = bj[jmin];
785       jl[k] = jl[i]; jl[i] = k;
786     }
787   }
788 
789   ierr = PetscFree(rtmp);CHKERRQ(ierr);
790   ierr = PetscFree(il);CHKERRQ(ierr);
791   ierr = PetscFree(dk);CHKERRQ(ierr);
792   ierr = PetscFree(pivots);CHKERRQ(ierr);
793   if (a->permute){
794     ierr = PetscFree(aa);CHKERRQ(ierr);
795   }
796 
797   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
798   C->factor       = FACTOR_CHOLESKY;
799   C->assembled    = PETSC_TRUE;
800   C->preallocated = PETSC_TRUE;
801   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
807 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
808 {
809   Mat            C = *B;
810   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
811   PetscErrorCode ierr;
812   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
813   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
814   PetscInt       bs=A->bs,bs2 = a->bs2;
815   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
816   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
817   MatScalar      *work;
818   PetscInt       *pivots;
819 
820   PetscFunctionBegin;
821   /* initialization */
822 
823   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
824   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
825   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
826   jl   = il + mbs;
827   for (i=0; i<mbs; i++) {
828     jl[i] = mbs; il[0] = 0;
829   }
830   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
831   uik  = dk + bs2;
832   work = uik + bs2;
833   ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr);
834 
835   ai = a->i; aj = a->j; aa = a->a;
836 
837   /* for each row k */
838   for (k = 0; k<mbs; k++){
839 
840     /*initialize k-th row with elements nonzero in row k of A */
841     jmin = ai[k]; jmax = ai[k+1];
842     ap = aa + jmin*bs2;
843     for (j = jmin; j < jmax; j++){
844       vj = aj[j];         /* block col. index */
845       rtmp_ptr = rtmp + vj*bs2;
846       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
847     }
848 
849     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
850     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
851     i = jl[k]; /* first row to be added to k_th row  */
852 
853     while (i < k){
854       nexti = jl[i]; /* next row to be added to k_th row */
855 
856       /* compute multiplier */
857       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
858 
859       /* uik = -inv(Di)*U_bar(i,k) */
860       diag = ba + i*bs2;
861       u    = ba + ili*bs2;
862       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
863       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
864 
865       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
866       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
867 
868       /* update -U(i,k) */
869       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
870 
871       /* add multiple of row i to k-th row ... */
872       jmin = ili + 1; jmax = bi[i+1];
873       if (jmin < jmax){
874         for (j=jmin; j<jmax; j++) {
875           /* rtmp += -U(i,k)^T * U_bar(i,j) */
876           rtmp_ptr = rtmp + bj[j]*bs2;
877           u = ba + j*bs2;
878           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
879         }
880 
881         /* ... add i to row list for next nonzero entry */
882         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
883         j     = bj[jmin];
884         jl[i] = jl[j]; jl[j] = i; /* update jl */
885       }
886       i = nexti;
887     }
888 
889     /* save nonzero entries in k-th row of U ... */
890 
891     /* invert diagonal block */
892     diag = ba+k*bs2;
893     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
894     ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr);
895 
896     jmin = bi[k]; jmax = bi[k+1];
897     if (jmin < jmax) {
898       for (j=jmin; j<jmax; j++){
899          vj = bj[j];           /* block col. index of U */
900          u   = ba + j*bs2;
901          rtmp_ptr = rtmp + vj*bs2;
902          for (k1=0; k1<bs2; k1++){
903            *u++        = *rtmp_ptr;
904            *rtmp_ptr++ = 0.0;
905          }
906       }
907 
908       /* ... add k to row list for first nonzero entry in k-th row */
909       il[k] = jmin;
910       i     = bj[jmin];
911       jl[k] = jl[i]; jl[i] = k;
912     }
913   }
914 
915   ierr = PetscFree(rtmp);CHKERRQ(ierr);
916   ierr = PetscFree(il);CHKERRQ(ierr);
917   ierr = PetscFree(dk);CHKERRQ(ierr);
918   ierr = PetscFree(pivots);CHKERRQ(ierr);
919 
920   C->factor    = FACTOR_CHOLESKY;
921   C->assembled = PETSC_TRUE;
922   C->preallocated = PETSC_TRUE;
923   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
924   PetscFunctionReturn(0);
925 }
926 
927 /*
928     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
929     Version for blocks 2 by 2.
930 */
931 #undef __FUNCT__
932 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
933 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
934 {
935   Mat            C = *B;
936   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
937   IS             perm = b->row;
938   PetscErrorCode ierr;
939   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
940   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
941   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
942   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
943 
944   PetscFunctionBegin;
945   /* initialization */
946   /* il and jl record the first nonzero element in each row of the accessing
947      window U(0:k, k:mbs-1).
948      jl:    list of rows to be added to uneliminated rows
949             i>= k: jl(i) is the first row to be added to row i
950             i<  k: jl(i) is the row following row i in some list of rows
951             jl(i) = mbs indicates the end of a list
952      il(i): points to the first nonzero element in columns k,...,mbs-1 of
953             row i of U */
954   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
955   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
956   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
957   jl   = il + mbs;
958   for (i=0; i<mbs; i++) {
959     jl[i] = mbs; il[0] = 0;
960   }
961   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
962   uik  = dk + 4;
963   ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
964 
965   /* check permutation */
966   if (!a->permute){
967     ai = a->i; aj = a->j; aa = a->a;
968   } else {
969     ai   = a->inew; aj = a->jnew;
970     ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
971     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
972     ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr);
973     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr);
974 
975     for (i=0; i<mbs; i++){
976       jmin = ai[i]; jmax = ai[i+1];
977       for (j=jmin; j<jmax; j++){
978         while (a2anew[j] != j){
979           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
980           for (k1=0; k1<4; k1++){
981             dk[k1]       = aa[k*4+k1];
982             aa[k*4+k1] = aa[j*4+k1];
983             aa[j*4+k1] = dk[k1];
984           }
985         }
986         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
987         if (i > aj[j]){
988           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
989           ap = aa + j*4;     /* ptr to the beginning of the block */
990           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
991           ap[1] = ap[2];
992           ap[2] = dk[1];
993         }
994       }
995     }
996     ierr = PetscFree(a2anew);CHKERRQ(ierr);
997   }
998 
999   /* for each row k */
1000   for (k = 0; k<mbs; k++){
1001 
1002     /*initialize k-th row with elements nonzero in row perm(k) of A */
1003     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
1004     ap = aa + jmin*4;
1005     for (j = jmin; j < jmax; j++){
1006       vj = perm_ptr[aj[j]];         /* block col. index */
1007       rtmp_ptr = rtmp + vj*4;
1008       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1009     }
1010 
1011     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1012     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
1013     i = jl[k]; /* first row to be added to k_th row  */
1014 
1015     while (i < k){
1016       nexti = jl[i]; /* next row to be added to k_th row */
1017 
1018       /* compute multiplier */
1019       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1020 
1021       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1022       diag = ba + i*4;
1023       u    = ba + ili*4;
1024       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1025       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1026       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1027       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1028 
1029       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1030       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1031       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1032       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1033       dk[3] += uik[2]*u[2] + uik[3]*u[3];
1034 
1035       /* update -U(i,k): ba[ili] = uik */
1036       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
1037 
1038       /* add multiple of row i to k-th row ... */
1039       jmin = ili + 1; jmax = bi[i+1];
1040       if (jmin < jmax){
1041         for (j=jmin; j<jmax; j++) {
1042           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1043           rtmp_ptr = rtmp + bj[j]*4;
1044           u = ba + j*4;
1045           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1046           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1047           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1048           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1049         }
1050 
1051         /* ... add i to row list for next nonzero entry */
1052         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1053         j     = bj[jmin];
1054         jl[i] = jl[j]; jl[j] = i; /* update jl */
1055       }
1056       i = nexti;
1057     }
1058 
1059     /* save nonzero entries in k-th row of U ... */
1060 
1061     /* invert diagonal block */
1062     diag = ba+k*4;
1063     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
1064     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
1065 
1066     jmin = bi[k]; jmax = bi[k+1];
1067     if (jmin < jmax) {
1068       for (j=jmin; j<jmax; j++){
1069          vj = bj[j];           /* block col. index of U */
1070          u   = ba + j*4;
1071          rtmp_ptr = rtmp + vj*4;
1072          for (k1=0; k1<4; k1++){
1073            *u++        = *rtmp_ptr;
1074            *rtmp_ptr++ = 0.0;
1075          }
1076       }
1077 
1078       /* ... add k to row list for first nonzero entry in k-th row */
1079       il[k] = jmin;
1080       i     = bj[jmin];
1081       jl[k] = jl[i]; jl[i] = k;
1082     }
1083   }
1084 
1085   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1086   ierr = PetscFree(il);CHKERRQ(ierr);
1087   ierr = PetscFree(dk);CHKERRQ(ierr);
1088   if (a->permute) {
1089     ierr = PetscFree(aa);CHKERRQ(ierr);
1090   }
1091   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
1092   C->factor    = FACTOR_CHOLESKY;
1093   C->assembled = PETSC_TRUE;
1094   C->preallocated = PETSC_TRUE;
1095   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*
1100       Version for when blocks are 2 by 2 Using natural ordering
1101 */
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
1104 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1105 {
1106   Mat            C = *B;
1107   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1108   PetscErrorCode ierr;
1109   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1110   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1111   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
1112   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1113 
1114   PetscFunctionBegin;
1115   /* initialization */
1116   /* il and jl record the first nonzero element in each row of the accessing
1117      window U(0:k, k:mbs-1).
1118      jl:    list of rows to be added to uneliminated rows
1119             i>= k: jl(i) is the first row to be added to row i
1120             i<  k: jl(i) is the row following row i in some list of rows
1121             jl(i) = mbs indicates the end of a list
1122      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1123             row i of U */
1124   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1125   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1126   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
1127   jl   = il + mbs;
1128   for (i=0; i<mbs; i++) {
1129     jl[i] = mbs; il[0] = 0;
1130   }
1131   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
1132   uik  = dk + 4;
1133 
1134   ai = a->i; aj = a->j; aa = a->a;
1135 
1136   /* for each row k */
1137   for (k = 0; k<mbs; k++){
1138 
1139     /*initialize k-th row with elements nonzero in row k of A */
1140     jmin = ai[k]; jmax = ai[k+1];
1141     ap = aa + jmin*4;
1142     for (j = jmin; j < jmax; j++){
1143       vj = aj[j];         /* block col. index */
1144       rtmp_ptr = rtmp + vj*4;
1145       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1146     }
1147 
1148     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1149     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
1150     i = jl[k]; /* first row to be added to k_th row  */
1151 
1152     while (i < k){
1153       nexti = jl[i]; /* next row to be added to k_th row */
1154 
1155       /* compute multiplier */
1156       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1157 
1158       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1159       diag = ba + i*4;
1160       u    = ba + ili*4;
1161       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1162       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1163       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1164       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1165 
1166       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1167       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1168       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1169       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1170       dk[3] += uik[2]*u[2] + uik[3]*u[3];
1171 
1172       /* update -U(i,k): ba[ili] = uik */
1173       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
1174 
1175       /* add multiple of row i to k-th row ... */
1176       jmin = ili + 1; jmax = bi[i+1];
1177       if (jmin < jmax){
1178         for (j=jmin; j<jmax; j++) {
1179           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1180           rtmp_ptr = rtmp + bj[j]*4;
1181           u = ba + j*4;
1182           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1183           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1184           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1185           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1186         }
1187 
1188         /* ... add i to row list for next nonzero entry */
1189         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1190         j     = bj[jmin];
1191         jl[i] = jl[j]; jl[j] = i; /* update jl */
1192       }
1193       i = nexti;
1194     }
1195 
1196     /* save nonzero entries in k-th row of U ... */
1197 
1198     /* invert diagonal block */
1199     diag = ba+k*4;
1200     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
1201     ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
1202 
1203     jmin = bi[k]; jmax = bi[k+1];
1204     if (jmin < jmax) {
1205       for (j=jmin; j<jmax; j++){
1206          vj = bj[j];           /* block col. index of U */
1207          u   = ba + j*4;
1208          rtmp_ptr = rtmp + vj*4;
1209          for (k1=0; k1<4; k1++){
1210            *u++        = *rtmp_ptr;
1211            *rtmp_ptr++ = 0.0;
1212          }
1213       }
1214 
1215       /* ... add k to row list for first nonzero entry in k-th row */
1216       il[k] = jmin;
1217       i     = bj[jmin];
1218       jl[k] = jl[i]; jl[i] = k;
1219     }
1220   }
1221 
1222   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1223   ierr = PetscFree(il);CHKERRQ(ierr);
1224   ierr = PetscFree(dk);CHKERRQ(ierr);
1225 
1226   C->factor    = FACTOR_CHOLESKY;
1227   C->assembled = PETSC_TRUE;
1228   C->preallocated = PETSC_TRUE;
1229   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*
1234     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1235     Version for blocks are 1 by 1.
1236 */
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
1239 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1240 {
1241   Mat            C = *B;
1242   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1243   IS             ip=b->row;
1244   PetscErrorCode ierr;
1245   PetscInt       *rip,i,j,mbs = a->mbs,*bi=b->i,*bj=b->j;
1246   PetscInt       *ai,*aj,*a2anew;
1247   PetscInt       k,jmin,jmax,*jl,*il,vj,nexti,ili,nz;
1248   MatScalar      *rtmp,*ba,*aa,dk,uikdi;
1249 
1250   PetscFunctionBegin;
1251   /* printf(" MatCholeskyFactorNumeric_SeqSBAIJ_1 is called\n"); */
1252   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1253   if (!a->permute){
1254     ai = a->i; aj = a->j; aa = a->a;
1255   } else {
1256     ai = a->inew; aj = a->jnew;
1257     nz = ai[mbs];
1258     ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr);
1259     a2anew = a->a2anew;
1260     ba     = a->a;
1261     for (j=0; j<nz; j++){
1262       aa[a2anew[j]] = *(ba++);
1263     }
1264   }
1265 
1266   /* initialization */
1267   /* il and jl record the first nonzero element in each row of the accessing
1268      window U(0:k, k:mbs-1).
1269      jl:    list of rows to be added to uneliminated rows
1270             i>= k: jl(i) is the first row to be added to row i
1271             i<  k: jl(i) is the row following row i in some list of rows
1272             jl(i) = mbs indicates the end of a list
1273      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1274             row i of U */
1275   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1276   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
1277   jl   = il + mbs;
1278   for (i=0; i<mbs; i++) {
1279     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1280   }
1281   ba = b->a;
1282   for (k = 0; k<mbs; k++){
1283     /*initialize k-th row with elements nonzero in row perm(k) of A */
1284     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1285     for (j = jmin; j < jmax; j++){
1286       vj = rip[aj[j]];
1287       rtmp[vj] = aa[j];
1288     }
1289 
1290     /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1291     dk = rtmp[k];
1292     i = jl[k]; /* first row to be added to k_th row  */
1293 
1294     while (i < k){
1295       nexti = jl[i]; /* next row to be added to k_th row */
1296 
1297       /* compute multiplier, update diag(k) and U(i,k) */
1298       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1299       uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */ /* uikdi = - ba[ili]*ba[i]; */
1300       dk += uikdi*ba[ili];
1301       ba[ili] = uikdi; /* -U(i,k) */
1302 
1303       /* add multiple of row i to k-th row ... */
1304       jmin = ili + 1; jmax = bi[i+1];
1305       if (jmin < jmax){
1306         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1307         /* update il and jl for row i */
1308         il[i] = jmin;
1309         j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1310       }
1311       i = nexti;
1312     }
1313 
1314     /* check for zero pivot and save diagoanl element */
1315     if (dk == 0.0){
1316       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1317     }
1318 
1319     /* copy data ino U(k,:) */
1320     ba[bi[k]] = 1.0/dk; /* ba[k] = 1.0/dk; */ /* U(k,k) */
1321     jmin = bi[k]+1; jmax = bi[k+1];
1322     if (jmin < jmax) {
1323       for (j=jmin; j<jmax; j++){
1324          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1325       }
1326       /* ... add k to row list for first nonzero entry in k-th row */
1327       il[k] = jmin;
1328       i     = bj[jmin];
1329       jl[k] = jl[i]; jl[i] = k;
1330     }
1331   }
1332 
1333   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1334   ierr = PetscFree(il);CHKERRQ(ierr);
1335   if (a->permute){
1336     ierr = PetscFree(aa);CHKERRQ(ierr);
1337   }
1338 
1339   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1340   C->factor    = FACTOR_CHOLESKY;
1341   C->assembled = PETSC_TRUE;
1342   C->preallocated = PETSC_TRUE;
1343   PetscLogFlops(b->mbs);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*
1348   Version for when blocks are 1 by 1 Using natural ordering
1349 */
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
1352 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1353 {
1354   Mat            C = *B;
1355   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1356   PetscErrorCode ierr;
1357   PetscInt       i,j,mbs = a->mbs;
1358   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1359   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1360   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1361   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1362   PetscTruth     damp,chshift;
1363   PetscInt       nshift=0;
1364 
1365   PetscFunctionBegin;
1366   /* initialization */
1367   /* il and jl record the first nonzero element in each row of the accessing
1368      window U(0:k, k:mbs-1).
1369      jl:    list of rows to be added to uneliminated rows
1370             i>= k: jl(i) is the first row to be added to row i
1371             i<  k: jl(i) is the row following row i in some list of rows
1372             jl(i) = mbs indicates the end of a list
1373      il(i): points to the first nonzero element in U(i,k:mbs-1)
1374   */
1375   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1376   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
1377   jl   = il + mbs;
1378 
1379   shift_amount = 0;
1380   do {
1381     damp = PETSC_FALSE;
1382     chshift = PETSC_FALSE;
1383     for (i=0; i<mbs; i++) {
1384       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1385     }
1386 
1387     for (k = 0; k<mbs; k++){ /* row k */
1388     /*initialize k-th row with elements nonzero in row perm(k) of A */
1389       nz   = ai[k+1] - ai[k];
1390       acol = aj + ai[k];
1391       aval = aa + ai[k];
1392       bval = ba + bi[k];
1393       while (nz -- ){
1394         rtmp[*acol++] = *aval++;
1395         *bval++       = 0.0; /* for in-place factorization */
1396       }
1397       /* damp the diagonal of the matrix */
1398       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1399 
1400       /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1401       dk = rtmp[k];
1402       i  = jl[k]; /* first row to be added to k_th row  */
1403 
1404       while (i < k){
1405         nexti = jl[i]; /* next row to be added to k_th row */
1406 
1407         /* compute multiplier, update D(k) and U(i,k) */
1408         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1409         uikdi = - ba[ili]*ba[bi[i]];
1410         dk   += uikdi*ba[ili];
1411         ba[ili] = uikdi; /* -U(i,k) */
1412 
1413         /* add multiple of row i to k-th row ... */
1414         jmin = ili + 1;
1415         nz   = bi[i+1] - jmin;
1416         if (nz > 0){
1417           bcol = bj + jmin;
1418           bval = ba + jmin;
1419           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1420           /* ... add i to row list for next nonzero entry */
1421           il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1422           j     = bj[jmin];
1423           jl[i] = jl[j]; jl[j] = i; /* update jl */
1424         }
1425         i = nexti;
1426       }
1427 
1428       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1429 	/* calculate a shift that would make this row diagonally dominant */
1430 	PetscReal rs = PetscAbs(PetscRealPart(dk));
1431 	jmin      = bi[k]+1;
1432 	nz        = bi[k+1] - jmin;
1433 	if (nz){
1434 	  bcol = bj + jmin;
1435 	  bval = ba + jmin;
1436 	  while (nz--){
1437 	    rs += PetscAbsScalar(rtmp[*bcol++]);
1438 	  }
1439 	}
1440 	/* if this shift is less than the previous, just up the previous
1441 	   one by a bit */
1442 	shift_amount = PetscMax(rs,1.1*shift_amount);
1443 	chshift  = PETSC_TRUE;
1444 	/* Unlike in the ILU case there is no exit condition on nshift:
1445 	   we increase the shift until it converges. There is no guarantee that
1446 	   this algorithm converges faster or slower, or is better or worse
1447 	   than the ILU algorithm. */
1448 	nshift++;
1449 	break;
1450       }
1451       if (PetscRealPart(dk) < zeropivot){
1452         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1453         if (damping > 0.0) {
1454           if (ndamp) damping *= 2.0;
1455           damp = PETSC_TRUE;
1456           ndamp++;
1457           break;
1458         } else if (PetscAbsScalar(dk) < zeropivot){
1459           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1460         } else {
1461           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1462         }
1463       }
1464 
1465       /* save nonzero entries in k-th row of U ... */
1466       /* printf("%D, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1467       ba[bi[k]] = 1.0/dk;
1468       jmin      = bi[k]+1;
1469       nz        = bi[k+1] - jmin;
1470       if (nz){
1471         bcol = bj + jmin;
1472         bval = ba + jmin;
1473         while (nz--){
1474           *bval++       = rtmp[*bcol];
1475           rtmp[*bcol++] = 0.0;
1476         }
1477         /* ... add k to row list for first nonzero entry in k-th row */
1478         il[k] = jmin;
1479         i     = bj[jmin];
1480         jl[k] = jl[i]; jl[i] = k;
1481       }
1482     } /* end of for (k = 0; k<mbs; k++) */
1483   } while (damp||chshift);
1484   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1485   ierr = PetscFree(il);CHKERRQ(ierr);
1486 
1487   C->factor       = FACTOR_CHOLESKY;
1488   C->assembled    = PETSC_TRUE;
1489   C->preallocated = PETSC_TRUE;
1490   PetscLogFlops(b->mbs);
1491   if (ndamp) {
1492     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1493   }
1494  if (nshift) {
1495     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1496   }
1497 
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ"
1503 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1504 {
1505   PetscErrorCode ierr;
1506   Mat            C;
1507 
1508   PetscFunctionBegin;
1509   ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr);
1510   ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr);
1511   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 
1516