xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact.c (revision fd5f9c01ffc3be76f9ceed6e73f05a0f273fc855)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 #include "src/inline/ilu.h"
6 #include "include/petscis.h"
7 
8 #if !defined(PETSC_USE_COMPLEX)
9 /*
10   input:
11    F -- numeric factor
12   output:
13    nneg, nzero, npos: matrix inertia
14 */
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetInertia_SeqSBAIJ"
18 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
19 {
20   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
21   MatScalar    *dd = fact_ptr->a;
22   PetscInt     mbs=fact_ptr->mbs,bs=F->rmap.bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;
23 
24   PetscFunctionBegin;
25   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
26   nneig_tmp = 0; npos_tmp = 0;
27   for (i=0; i<mbs; i++){
28     if (PetscRealPart(dd[*fi]) > 0.0){
29       npos_tmp++;
30     } else if (PetscRealPart(dd[*fi]) < 0.0){
31       nneig_tmp++;
32     }
33     fi++;
34   }
35   if (nneig) *nneig = nneig_tmp;
36   if (npos)  *npos  = npos_tmp;
37   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;
38 
39   PetscFunctionReturn(0);
40 }
41 #endif /* !defined(PETSC_USE_COMPLEX) */
42 
43 /*
44   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
45   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
46 */
47 #undef __FUNCT__
48 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR"
49 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
50 {
51   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
52   PetscErrorCode ierr;
53   PetscInt       *rip,i,mbs = a->mbs,*ai,*aj;
54   PetscInt       *jutmp,bs = A->rmap.bs,bs2=a->bs2;
55   PetscInt       m,reallocs = 0,prow;
56   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
57   PetscReal      f = info->fill;
58   PetscTruth     perm_identity;
59 
60   PetscFunctionBegin;
61   /* check whether perm is the identity mapping */
62   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
63   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
64 
65   if (perm_identity){ /* without permutation */
66     a->permute = PETSC_FALSE;
67     ai = a->i; aj = a->j;
68   } else {            /* non-trivial permutation */
69     a->permute = PETSC_TRUE;
70     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
71     ai = a->inew; aj = a->jnew;
72   }
73 
74   /* initialization */
75   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
76   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
77   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
78   iu[0] = mbs+1;
79   juidx = mbs + 1; /* index for ju */
80   ierr  = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */
81   q     = jl + mbs;   /* linked list for col index */
82   for (i=0; i<mbs; i++){
83     jl[i] = mbs;
84     q[i] = 0;
85   }
86 
87   /* for each row k */
88   for (k=0; k<mbs; k++){
89     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
90     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
91     q[k] = mbs;
92     /* initialize nonzero structure of k-th row to row rip[k] of A */
93     jmin = ai[rip[k]] +1; /* exclude diag[k] */
94     jmax = ai[rip[k]+1];
95     for (j=jmin; j<jmax; j++){
96       vj = rip[aj[j]]; /* col. value */
97       if(vj > k){
98         qm = k;
99         do {
100           m  = qm; qm = q[m];
101         } while(qm < vj);
102         if (qm == vj) {
103           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
104         }
105         nzk++;
106         q[m]  = vj;
107         q[vj] = qm;
108       } /* if(vj > k) */
109     } /* for (j=jmin; j<jmax; j++) */
110 
111     /* modify nonzero structure of k-th row by computing fill-in
112        for each row i to be merged in */
113     prow = k;
114     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
115 
116     while (prow < k){
117       /* merge row prow into k-th row */
118       jmin = iu[prow] + 1; jmax = iu[prow+1];
119       qm = k;
120       for (j=jmin; j<jmax; j++){
121         vj = ju[j];
122         do {
123           m = qm; qm = q[m];
124         } while (qm < vj);
125         if (qm != vj){
126          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
127         }
128       }
129       prow = jl[prow]; /* next pivot row */
130     }
131 
132     /* add k to row list for first nonzero element in k-th row */
133     if (nzk > 0){
134       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
135       jl[k] = jl[i]; jl[i] = k;
136     }
137     iu[k+1] = iu[k] + nzk;
138 
139     /* allocate more space to ju if needed */
140     if (iu[k+1] > umax) {
141       /* estimate how much additional space we will need */
142       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143       /* just double the memory each time */
144       maxadd = umax;
145       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
146       umax += maxadd;
147 
148       /* allocate a longer ju */
149       ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
150       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
151       ierr = PetscFree(ju);CHKERRQ(ierr);
152       ju   = jutmp;
153       reallocs++; /* count how many times we realloc */
154     }
155 
156     /* save nonzero structure of k-th row in ju */
157     i=k;
158     while (nzk --) {
159       i           = q[i];
160       ju[juidx++] = i;
161     }
162   }
163 
164 #if defined(PETSC_USE_INFO)
165   if (ai[mbs] != 0) {
166     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
167     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
168     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
169     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
170     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
171   } else {
172     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
173   }
174 #endif
175 
176   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
177   ierr = PetscFree(jl);CHKERRQ(ierr);
178 
179   /* put together the new matrix */
180   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
181   ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr);
182   ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr);
183   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
184 
185   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
186   b = (Mat_SeqSBAIJ*)(*B)->data;
187   b->singlemalloc = PETSC_FALSE;
188   b->free_a       = PETSC_TRUE;
189   b->free_ij       = PETSC_TRUE;
190   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
191   b->j    = ju;
192   b->i    = iu;
193   b->diag = 0;
194   b->ilen = 0;
195   b->imax = 0;
196   b->row  = perm;
197   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
198   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
199   b->icol = perm;
200   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
201   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
202   /* In b structure:  Free imax, ilen, old a, old j.
203      Allocate idnew, solve_work, new a, new j */
204   ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
205   b->maxnz = b->nz = iu[mbs];
206 
207   (*B)->factor                 = MAT_FACTOR_CHOLESKY;
208   (*B)->info.factor_mallocs    = reallocs;
209   (*B)->info.fill_ratio_given  = f;
210   if (ai[mbs] != 0) {
211     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
212   } else {
213     (*B)->info.fill_ratio_needed = 0.0;
214   }
215 
216   if (perm_identity){
217     switch (bs) {
218       case 1:
219         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
220         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
221         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
222         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
223         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
224         break;
225       case 2:
226         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
227         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
228         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
229         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
230         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
231         break;
232       case 3:
233         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
234         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
235         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
236         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
237         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
238         break;
239       case 4:
240         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
241         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
242         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
243         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
244         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
245         break;
246       case 5:
247         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
248         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
249         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
250         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
251         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
252         break;
253       case 6:
254         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
255         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
256         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
257         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
258         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
259         break;
260       case 7:
261         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
262         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
263         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
264         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
265         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
266       break;
267       default:
268         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
269         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
270         (*B)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
271         (*B)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
272         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
273       break;
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 /*
279     Symbolic U^T*D*U factorization for SBAIJ format.
280 */
281 #include "petscbt.h"
282 #include "src/mat/utils/freespace.h"
283 #undef __FUNCT__
284 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ"
285 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
286 {
287   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
288   Mat_SeqSBAIJ       *b;
289   Mat                B;
290   PetscErrorCode     ierr;
291   PetscTruth         perm_identity,missing;
292   PetscReal          fill = info->fill;
293   PetscInt           *rip,i,mbs=a->mbs,bs=A->rmap.bs,*ai,*aj,reallocs=0,prow,d;
294   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
295   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
296   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
297   PetscBT            lnkbt;
298 
299   PetscFunctionBegin;
300   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
301   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
302 
303   /*
304    This code originally uses Modified Sparse Row (MSR) storage
305    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
306    Then it is rewritten so the factor B takes seqsbaij format. However the associated
307    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
308    thus the original code in MSR format is still used for these cases.
309    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
310    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
311   */
312   if (bs > 1){
313     ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
314     PetscFunctionReturn(0);
315   }
316 
317   /* check whether perm is the identity mapping */
318   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
319 
320   if (perm_identity){
321     a->permute = PETSC_FALSE;
322     ai = a->i; aj = a->j;
323   } else {
324     SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
325     /* There are bugs for reordeing. Needs further work.
326        MatReordering for sbaij cannot be efficient. User should use aij formt! */
327     a->permute = PETSC_TRUE;
328     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
329     ai = a->inew; aj = a->jnew;
330   }
331   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
332 
333   /* initialization */
334   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
335   ui[0] = 0;
336 
337   /* jl: linked list for storing indices of the pivot rows
338      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
339   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
340   il     = jl + mbs;
341   cols   = il + mbs;
342   ui_ptr = (PetscInt**)(cols + mbs);
343 
344   for (i=0; i<mbs; i++){
345     jl[i] = mbs; il[i] = 0;
346   }
347 
348   /* create and initialize a linked list for storing column indices of the active row k */
349   nlnk = mbs + 1;
350   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
351 
352   /* initial FreeSpace size is fill*(ai[mbs]+1) */
353   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
354   current_space = free_space;
355 
356   for (k=0; k<mbs; k++){  /* for each active row k */
357     /* initialize lnk by the column indices of row rip[k] of A */
358     nzk   = 0;
359     ncols = ai[rip[k]+1] - ai[rip[k]];
360     for (j=0; j<ncols; j++){
361       i = *(aj + ai[rip[k]] + j);
362       cols[j] = rip[i];
363     }
364     ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
365     nzk += nlnk;
366 
367     /* update lnk by computing fill-in for each pivot row to be merged in */
368     prow = jl[k]; /* 1st pivot row */
369 
370     while (prow < k){
371       nextprow = jl[prow];
372       /* merge prow into k-th row */
373       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
374       jmax = ui[prow+1];
375       ncols = jmax-jmin;
376       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
377       ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
378       nzk += nlnk;
379 
380       /* update il and jl for prow */
381       if (jmin < jmax){
382         il[prow] = jmin;
383         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
384       }
385       prow = nextprow;
386     }
387 
388     /* if free space is not available, make more free space */
389     if (current_space->local_remaining<nzk) {
390       i = mbs - k + 1; /* num of unfactored rows */
391       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
392       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
393       reallocs++;
394     }
395 
396     /* copy data into free space, then initialize lnk */
397     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
398 
399     /* add the k-th row into il and jl */
400     if (nzk-1 > 0){
401       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
402       jl[k] = jl[i]; jl[i] = k;
403       il[k] = ui[k] + 1;
404     }
405     ui_ptr[k] = current_space->array;
406     current_space->array           += nzk;
407     current_space->local_used      += nzk;
408     current_space->local_remaining -= nzk;
409 
410     ui[k+1] = ui[k] + nzk;
411   }
412 
413 #if defined(PETSC_USE_INFO)
414   if (ai[mbs] != 0) {
415     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
416     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
417     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
418     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
419   } else {
420     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
421   }
422 #endif
423 
424   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
425   ierr = PetscFree(jl);CHKERRQ(ierr);
426 
427   /* destroy list of free space and other temporary array(s) */
428   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
429   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
430   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
431 
432   /* put together the new matrix in MATSEQSBAIJ format */
433   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
434 
435   b = (Mat_SeqSBAIJ*)(*fact)->data;
436   b->singlemalloc = PETSC_FALSE;
437   b->free_a       = PETSC_TRUE;
438   b->free_ij      = PETSC_TRUE;
439   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
440   b->j    = uj;
441   b->i    = ui;
442   b->diag = 0;
443   b->ilen = 0;
444   b->imax = 0;
445   b->row  = perm;
446   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
447   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
448   b->icol = perm;
449   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
450   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
451   ierr    = PetscLogObjectMemory(*fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
452   b->maxnz = b->nz = ui[mbs];
453 
454   (*fact)->factor                 = MAT_FACTOR_CHOLESKY;
455   (*fact)->info.factor_mallocs    = reallocs;
456   (*fact)->info.fill_ratio_given  = fill;
457   if (ai[mbs] != 0) {
458     (*fact)->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
459   } else {
460     (*fact)->info.fill_ratio_needed = 0.0;
461   }
462 
463   if (perm_identity){
464     switch (bs) {
465       case 1:
466         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
467         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
468         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
469         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
470         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
471         break;
472       case 2:
473         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
474         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
475         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
476         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
477         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
478         break;
479       case 3:
480         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
481         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
482         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
483         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
484         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
485         break;
486       case 4:
487         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
488         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
489         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
490         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
491         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
492         break;
493       case 5:
494         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
495         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
496         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
497         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
498         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
499         break;
500       case 6:
501         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
502         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
503         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
504         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
505         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
506         break;
507       case 7:
508         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
509         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
510         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
511         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
512         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
513       break;
514       default:
515         (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
516         (*fact)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
517         (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
518         (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
519         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
520       break;
521     }
522   }
523   PetscFunctionReturn(0);
524 }
525 #undef __FUNCT__
526 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N"
527 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,MatFactorInfo *info,Mat *B)
528 {
529   Mat            C = *B;
530   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
531   IS             perm = b->row;
532   PetscErrorCode ierr;
533   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
534   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
535   PetscInt       bs=A->rmap.bs,bs2 = a->bs2,bslog = 0;
536   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
537   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
538   MatScalar      *work;
539   PetscInt       *pivots;
540 
541   PetscFunctionBegin;
542   /* initialization */
543   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
544   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
545   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
546   jl   = il + mbs;
547   for (i=0; i<mbs; i++) {
548     jl[i] = mbs; il[0] = 0;
549   }
550   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
551   uik  = dk + bs2;
552   work = uik + bs2;
553   ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr);
554 
555   ierr  = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
556 
557   /* check permutation */
558   if (!a->permute){
559     ai = a->i; aj = a->j; aa = a->a;
560   } else {
561     ai   = a->inew; aj = a->jnew;
562     ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
563     ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
564     ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr);
565     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr);
566 
567     /* flops in while loop */
568     bslog = 2*bs*bs2;
569 
570     for (i=0; i<mbs; i++){
571       jmin = ai[i]; jmax = ai[i+1];
572       for (j=jmin; j<jmax; j++){
573         while (a2anew[j] != j){
574           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
575           for (k1=0; k1<bs2; k1++){
576             dk[k1]       = aa[k*bs2+k1];
577             aa[k*bs2+k1] = aa[j*bs2+k1];
578             aa[j*bs2+k1] = dk[k1];
579           }
580         }
581         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
582         if (i > aj[j]){
583           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
584           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
585           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
586           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
587             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
588           }
589         }
590       }
591     }
592     ierr = PetscFree(a2anew);CHKERRQ(ierr);
593   }
594 
595   /* for each row k */
596   for (k = 0; k<mbs; k++){
597 
598     /*initialize k-th row with elements nonzero in row perm(k) of A */
599     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
600 
601     ap = aa + jmin*bs2;
602     for (j = jmin; j < jmax; j++){
603       vj = perm_ptr[aj[j]];         /* block col. index */
604       rtmp_ptr = rtmp + vj*bs2;
605       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
606     }
607 
608     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
609     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
610     i = jl[k]; /* first row to be added to k_th row  */
611 
612     while (i < k){
613       nexti = jl[i]; /* next row to be added to k_th row */
614 
615       /* compute multiplier */
616       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
617 
618       /* uik = -inv(Di)*U_bar(i,k) */
619       diag = ba + i*bs2;
620       u    = ba + ili*bs2;
621       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
622       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
623 
624       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
625       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
626       ierr = PetscLogFlops(bslog*2);CHKERRQ(ierr);
627 
628       /* update -U(i,k) */
629       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
630 
631       /* add multiple of row i to k-th row ... */
632       jmin = ili + 1; jmax = bi[i+1];
633       if (jmin < jmax){
634         for (j=jmin; j<jmax; j++) {
635           /* rtmp += -U(i,k)^T * U_bar(i,j) */
636           rtmp_ptr = rtmp + bj[j]*bs2;
637           u = ba + j*bs2;
638           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
639         }
640         ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr);
641 
642         /* ... add i to row list for next nonzero entry */
643         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
644         j     = bj[jmin];
645         jl[i] = jl[j]; jl[j] = i; /* update jl */
646       }
647       i = nexti;
648     }
649 
650     /* save nonzero entries in k-th row of U ... */
651 
652     /* invert diagonal block */
653     diag = ba+k*bs2;
654     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
655     ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr);
656 
657     jmin = bi[k]; jmax = bi[k+1];
658     if (jmin < jmax) {
659       for (j=jmin; j<jmax; j++){
660          vj = bj[j];           /* block col. index of U */
661          u   = ba + j*bs2;
662          rtmp_ptr = rtmp + vj*bs2;
663          for (k1=0; k1<bs2; k1++){
664            *u++        = *rtmp_ptr;
665            *rtmp_ptr++ = 0.0;
666          }
667       }
668 
669       /* ... add k to row list for first nonzero entry in k-th row */
670       il[k] = jmin;
671       i     = bj[jmin];
672       jl[k] = jl[i]; jl[i] = k;
673     }
674   }
675 
676   ierr = PetscFree(rtmp);CHKERRQ(ierr);
677   ierr = PetscFree(il);CHKERRQ(ierr);
678   ierr = PetscFree(dk);CHKERRQ(ierr);
679   ierr = PetscFree(pivots);CHKERRQ(ierr);
680   if (a->permute){
681     ierr = PetscFree(aa);CHKERRQ(ierr);
682   }
683 
684   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
685   C->factor       = MAT_FACTOR_CHOLESKY;
686   C->assembled    = PETSC_TRUE;
687   C->preallocated = PETSC_TRUE;
688   ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering"
694 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
695 {
696   Mat            C = *B;
697   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
698   PetscErrorCode ierr;
699   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
700   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
701   PetscInt       bs=A->rmap.bs,bs2 = a->bs2,bslog;
702   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
703   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
704   MatScalar      *work;
705   PetscInt       *pivots;
706 
707   PetscFunctionBegin;
708   ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
709   ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr);
710   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
711   jl   = il + mbs;
712   for (i=0; i<mbs; i++) {
713     jl[i] = mbs; il[0] = 0;
714   }
715   ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr);
716   uik  = dk + bs2;
717   work = uik + bs2;
718   ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr);
719 
720   ai = a->i; aj = a->j; aa = a->a;
721 
722   /* flops in while loop */
723   bslog = 2*bs*bs2;
724 
725   /* for each row k */
726   for (k = 0; k<mbs; k++){
727 
728     /*initialize k-th row with elements nonzero in row k of A */
729     jmin = ai[k]; jmax = ai[k+1];
730     ap = aa + jmin*bs2;
731     for (j = jmin; j < jmax; j++){
732       vj = aj[j];         /* block col. index */
733       rtmp_ptr = rtmp + vj*bs2;
734       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
735     }
736 
737     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
738     ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
739     i = jl[k]; /* first row to be added to k_th row  */
740 
741     while (i < k){
742       nexti = jl[i]; /* next row to be added to k_th row */
743 
744       /* compute multiplier */
745       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
746 
747       /* uik = -inv(Di)*U_bar(i,k) */
748       diag = ba + i*bs2;
749       u    = ba + ili*bs2;
750       ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
751       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
752 
753       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
754       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
755       ierr = PetscLogFlops(bslog*2);CHKERRQ(ierr);
756 
757       /* update -U(i,k) */
758       ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr);
759 
760       /* add multiple of row i to k-th row ... */
761       jmin = ili + 1; jmax = bi[i+1];
762       if (jmin < jmax){
763         for (j=jmin; j<jmax; j++) {
764           /* rtmp += -U(i,k)^T * U_bar(i,j) */
765           rtmp_ptr = rtmp + bj[j]*bs2;
766           u = ba + j*bs2;
767           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
768         }
769         ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr);
770 
771         /* ... add i to row list for next nonzero entry */
772         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
773         j     = bj[jmin];
774         jl[i] = jl[j]; jl[j] = i; /* update jl */
775       }
776       i = nexti;
777     }
778 
779     /* save nonzero entries in k-th row of U ... */
780 
781     /* invert diagonal block */
782     diag = ba+k*bs2;
783     ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr);
784     ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr);
785 
786     jmin = bi[k]; jmax = bi[k+1];
787     if (jmin < jmax) {
788       for (j=jmin; j<jmax; j++){
789          vj = bj[j];           /* block col. index of U */
790          u   = ba + j*bs2;
791          rtmp_ptr = rtmp + vj*bs2;
792          for (k1=0; k1<bs2; k1++){
793            *u++        = *rtmp_ptr;
794            *rtmp_ptr++ = 0.0;
795          }
796       }
797 
798       /* ... add k to row list for first nonzero entry in k-th row */
799       il[k] = jmin;
800       i     = bj[jmin];
801       jl[k] = jl[i]; jl[i] = k;
802     }
803   }
804 
805   ierr = PetscFree(rtmp);CHKERRQ(ierr);
806   ierr = PetscFree(il);CHKERRQ(ierr);
807   ierr = PetscFree(dk);CHKERRQ(ierr);
808   ierr = PetscFree(pivots);CHKERRQ(ierr);
809 
810   C->factor    = MAT_FACTOR_CHOLESKY;
811   C->assembled = PETSC_TRUE;
812   C->preallocated = PETSC_TRUE;
813   ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
814   PetscFunctionReturn(0);
815 }
816 
817 /*
818     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
819     Version for blocks 2 by 2.
820 */
821 #undef __FUNCT__
822 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2"
823 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,MatFactorInfo *info,Mat *B)
824 {
825   Mat            C = *B;
826   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
827   IS             perm = b->row;
828   PetscErrorCode ierr;
829   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
830   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
831   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
832   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
833   PetscReal      shift = info->shiftinblocks;
834 
835   PetscFunctionBegin;
836   /* initialization */
837   /* il and jl record the first nonzero element in each row of the accessing
838      window U(0:k, k:mbs-1).
839      jl:    list of rows to be added to uneliminated rows
840             i>= k: jl(i) is the first row to be added to row i
841             i<  k: jl(i) is the row following row i in some list of rows
842             jl(i) = mbs indicates the end of a list
843      il(i): points to the first nonzero element in columns k,...,mbs-1 of
844             row i of U */
845   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
846   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
847   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
848   jl   = il + mbs;
849   for (i=0; i<mbs; i++) {
850     jl[i] = mbs; il[0] = 0;
851   }
852   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
853   uik  = dk + 4;
854   ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr);
855 
856   /* check permutation */
857   if (!a->permute){
858     ai = a->i; aj = a->j; aa = a->a;
859   } else {
860     ai   = a->inew; aj = a->jnew;
861     ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr);
862     ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr);
863     ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr);
864     ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr);
865 
866     for (i=0; i<mbs; i++){
867       jmin = ai[i]; jmax = ai[i+1];
868       for (j=jmin; j<jmax; j++){
869         while (a2anew[j] != j){
870           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
871           for (k1=0; k1<4; k1++){
872             dk[k1]       = aa[k*4+k1];
873             aa[k*4+k1] = aa[j*4+k1];
874             aa[j*4+k1] = dk[k1];
875           }
876         }
877         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
878         if (i > aj[j]){
879           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
880           ap = aa + j*4;     /* ptr to the beginning of the block */
881           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
882           ap[1] = ap[2];
883           ap[2] = dk[1];
884         }
885       }
886     }
887     ierr = PetscFree(a2anew);CHKERRQ(ierr);
888   }
889 
890   /* for each row k */
891   for (k = 0; k<mbs; k++){
892 
893     /*initialize k-th row with elements nonzero in row perm(k) of A */
894     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
895     ap = aa + jmin*4;
896     for (j = jmin; j < jmax; j++){
897       vj = perm_ptr[aj[j]];         /* block col. index */
898       rtmp_ptr = rtmp + vj*4;
899       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
900     }
901 
902     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
903     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
904     i = jl[k]; /* first row to be added to k_th row  */
905 
906     while (i < k){
907       nexti = jl[i]; /* next row to be added to k_th row */
908 
909       /* compute multiplier */
910       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
911 
912       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
913       diag = ba + i*4;
914       u    = ba + ili*4;
915       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
916       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
917       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
918       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
919 
920       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
921       dk[0] += uik[0]*u[0] + uik[1]*u[1];
922       dk[1] += uik[2]*u[0] + uik[3]*u[1];
923       dk[2] += uik[0]*u[2] + uik[1]*u[3];
924       dk[3] += uik[2]*u[2] + uik[3]*u[3];
925 
926       ierr = PetscLogFlops(16*2);CHKERRQ(ierr);
927 
928       /* update -U(i,k): ba[ili] = uik */
929       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
930 
931       /* add multiple of row i to k-th row ... */
932       jmin = ili + 1; jmax = bi[i+1];
933       if (jmin < jmax){
934         for (j=jmin; j<jmax; j++) {
935           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
936           rtmp_ptr = rtmp + bj[j]*4;
937           u = ba + j*4;
938           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
939           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
940           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
941           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
942         }
943         ierr = PetscLogFlops(16*(jmax-jmin));CHKERRQ(ierr);
944 
945         /* ... add i to row list for next nonzero entry */
946         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
947         j     = bj[jmin];
948         jl[i] = jl[j]; jl[j] = i; /* update jl */
949       }
950       i = nexti;
951     }
952 
953     /* save nonzero entries in k-th row of U ... */
954 
955     /* invert diagonal block */
956     diag = ba+k*4;
957     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
958     ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
959 
960     jmin = bi[k]; jmax = bi[k+1];
961     if (jmin < jmax) {
962       for (j=jmin; j<jmax; j++){
963          vj = bj[j];           /* block col. index of U */
964          u   = ba + j*4;
965          rtmp_ptr = rtmp + vj*4;
966          for (k1=0; k1<4; k1++){
967            *u++        = *rtmp_ptr;
968            *rtmp_ptr++ = 0.0;
969          }
970       }
971 
972       /* ... add k to row list for first nonzero entry in k-th row */
973       il[k] = jmin;
974       i     = bj[jmin];
975       jl[k] = jl[i]; jl[i] = k;
976     }
977   }
978 
979   ierr = PetscFree(rtmp);CHKERRQ(ierr);
980   ierr = PetscFree(il);CHKERRQ(ierr);
981   ierr = PetscFree(dk);CHKERRQ(ierr);
982   if (a->permute) {
983     ierr = PetscFree(aa);CHKERRQ(ierr);
984   }
985   ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr);
986   C->factor    = MAT_FACTOR_CHOLESKY;
987   C->assembled = PETSC_TRUE;
988   C->preallocated = PETSC_TRUE;
989   ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
990   PetscFunctionReturn(0);
991 }
992 
993 /*
994       Version for when blocks are 2 by 2 Using natural ordering
995 */
996 #undef __FUNCT__
997 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering"
998 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
999 {
1000   Mat            C = *B;
1001   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1002   PetscErrorCode ierr;
1003   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1004   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1005   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
1006   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1007   PetscReal      shift = info->shiftinblocks;
1008 
1009   PetscFunctionBegin;
1010   /* initialization */
1011   /* il and jl record the first nonzero element in each row of the accessing
1012      window U(0:k, k:mbs-1).
1013      jl:    list of rows to be added to uneliminated rows
1014             i>= k: jl(i) is the first row to be added to row i
1015             i<  k: jl(i) is the row following row i in some list of rows
1016             jl(i) = mbs indicates the end of a list
1017      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1018             row i of U */
1019   ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1020   ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1021   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
1022   jl   = il + mbs;
1023   for (i=0; i<mbs; i++) {
1024     jl[i] = mbs; il[0] = 0;
1025   }
1026   ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr);
1027   uik  = dk + 4;
1028 
1029   ai = a->i; aj = a->j; aa = a->a;
1030 
1031   /* for each row k */
1032   for (k = 0; k<mbs; k++){
1033 
1034     /*initialize k-th row with elements nonzero in row k of A */
1035     jmin = ai[k]; jmax = ai[k+1];
1036     ap = aa + jmin*4;
1037     for (j = jmin; j < jmax; j++){
1038       vj = aj[j];         /* block col. index */
1039       rtmp_ptr = rtmp + vj*4;
1040       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1041     }
1042 
1043     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1044     ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr);
1045     i = jl[k]; /* first row to be added to k_th row  */
1046 
1047     while (i < k){
1048       nexti = jl[i]; /* next row to be added to k_th row */
1049 
1050       /* compute multiplier */
1051       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1052 
1053       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1054       diag = ba + i*4;
1055       u    = ba + ili*4;
1056       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1057       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1058       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1059       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1060 
1061       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1062       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1063       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1064       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1065       dk[3] += uik[2]*u[2] + uik[3]*u[3];
1066 
1067       ierr = PetscLogFlops(16*2);CHKERRQ(ierr);
1068 
1069       /* update -U(i,k): ba[ili] = uik */
1070       ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr);
1071 
1072       /* add multiple of row i to k-th row ... */
1073       jmin = ili + 1; jmax = bi[i+1];
1074       if (jmin < jmax){
1075         for (j=jmin; j<jmax; j++) {
1076           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1077           rtmp_ptr = rtmp + bj[j]*4;
1078           u = ba + j*4;
1079           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1080           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1081           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1082           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1083         }
1084         ierr = PetscLogFlops(16*(jmax-jmin));CHKERRQ(ierr);
1085 
1086         /* ... add i to row list for next nonzero entry */
1087         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1088         j     = bj[jmin];
1089         jl[i] = jl[j]; jl[j] = i; /* update jl */
1090       }
1091       i = nexti;
1092     }
1093 
1094     /* save nonzero entries in k-th row of U ... */
1095 
1096     /* invert diagonal block */
1097     diag = ba+k*4;
1098     ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr);
1099     ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
1100 
1101     jmin = bi[k]; jmax = bi[k+1];
1102     if (jmin < jmax) {
1103       for (j=jmin; j<jmax; j++){
1104          vj = bj[j];           /* block col. index of U */
1105          u   = ba + j*4;
1106          rtmp_ptr = rtmp + vj*4;
1107          for (k1=0; k1<4; k1++){
1108            *u++        = *rtmp_ptr;
1109            *rtmp_ptr++ = 0.0;
1110          }
1111       }
1112 
1113       /* ... add k to row list for first nonzero entry in k-th row */
1114       il[k] = jmin;
1115       i     = bj[jmin];
1116       jl[k] = jl[i]; jl[i] = k;
1117     }
1118   }
1119 
1120   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1121   ierr = PetscFree(il);CHKERRQ(ierr);
1122   ierr = PetscFree(dk);CHKERRQ(ierr);
1123 
1124   C->factor    = MAT_FACTOR_CHOLESKY;
1125   C->assembled = PETSC_TRUE;
1126   C->preallocated = PETSC_TRUE;
1127   ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 /*
1132     Numeric U^T*D*U factorization for SBAIJ format.
1133     Version for blocks are 1 by 1.
1134 */
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1"
1137 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,MatFactorInfo *info,Mat *B)
1138 {
1139   Mat            C = *B;
1140   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1141   IS             ip=b->row;
1142   PetscErrorCode ierr;
1143   PetscInt       *rip,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1144   PetscInt       *ai,*aj,*a2anew;
1145   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1146   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1147   PetscReal      zeropivot,rs,shiftnz;
1148   PetscReal      shiftpd;
1149   ChShift_Ctx    sctx;
1150   PetscInt       newshift;
1151 
1152   PetscFunctionBegin;
1153   /* initialization */
1154   shiftnz   = info->shiftnz;
1155   shiftpd   = info->shiftpd;
1156   zeropivot = info->zeropivot;
1157 
1158   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
1159   if (!a->permute){
1160     ai = a->i; aj = a->j; aa = a->a;
1161   } else {
1162     ai = a->inew; aj = a->jnew;
1163     nz = ai[mbs];
1164     ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr);
1165     a2anew = a->a2anew;
1166     bval   = a->a;
1167     for (j=0; j<nz; j++){
1168       aa[a2anew[j]] = *(bval++);
1169     }
1170   }
1171 
1172   /* initialization */
1173   /* il and jl record the first nonzero element in each row of the accessing
1174      window U(0:k, k:mbs-1).
1175      jl:    list of rows to be added to uneliminated rows
1176             i>= k: jl(i) is the first row to be added to row i
1177             i<  k: jl(i) is the row following row i in some list of rows
1178             jl(i) = mbs indicates the end of a list
1179      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1180             row i of U */
1181   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1182   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
1183   jl   = il + mbs;
1184   rtmp = (MatScalar*)(jl + mbs);
1185 
1186   sctx.shift_amount = 0;
1187   sctx.nshift       = 0;
1188   do {
1189     sctx.chshift = PETSC_FALSE;
1190     for (i=0; i<mbs; i++) {
1191       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1192     }
1193 
1194     for (k = 0; k<mbs; k++){
1195       /*initialize k-th row by the perm[k]-th row of A */
1196       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1197       bval = ba + bi[k];
1198       for (j = jmin; j < jmax; j++){
1199         col = rip[aj[j]];
1200         rtmp[col] = aa[j];
1201         *bval++  = 0.0; /* for in-place factorization */
1202       }
1203 
1204       /* shift the diagonal of the matrix */
1205       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1206 
1207       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1208       dk = rtmp[k];
1209       i = jl[k]; /* first row to be added to k_th row  */
1210 
1211       while (i < k){
1212         nexti = jl[i]; /* next row to be added to k_th row */
1213 
1214         /* compute multiplier, update diag(k) and U(i,k) */
1215         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1216         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1217         dk += uikdi*ba[ili];
1218         ba[ili] = uikdi; /* -U(i,k) */
1219 
1220         /* add multiple of row i to k-th row */
1221         jmin = ili + 1; jmax = bi[i+1];
1222         if (jmin < jmax){
1223           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1224           ierr = PetscLogFlops(2*(jmax-jmin));CHKERRQ(ierr);
1225 
1226           /* update il and jl for row i */
1227           il[i] = jmin;
1228           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1229         }
1230         i = nexti;
1231       }
1232 
1233       /* shift the diagonals when zero pivot is detected */
1234       /* compute rs=sum of abs(off-diagonal) */
1235       rs   = 0.0;
1236       jmin = bi[k]+1;
1237       nz   = bi[k+1] - jmin;
1238       if (nz){
1239         bcol = bj + jmin;
1240         while (nz--){
1241           rs += PetscAbsScalar(rtmp[*bcol]);
1242           bcol++;
1243         }
1244       }
1245 
1246       sctx.rs = rs;
1247       sctx.pv = dk;
1248       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1249       if (newshift == 1) break;    /* sctx.shift_amount is updated */
1250 
1251       /* copy data into U(k,:) */
1252       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1253       jmin = bi[k]+1; jmax = bi[k+1];
1254       if (jmin < jmax) {
1255         for (j=jmin; j<jmax; j++){
1256           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1257         }
1258         /* add the k-th row into il and jl */
1259         il[k] = jmin;
1260         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1261       }
1262     }
1263   } while (sctx.chshift);
1264   ierr = PetscFree(il);CHKERRQ(ierr);
1265   if (a->permute){ierr = PetscFree(aa);CHKERRQ(ierr);}
1266 
1267   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
1268   C->factor       = MAT_FACTOR_CHOLESKY;
1269   C->assembled    = PETSC_TRUE;
1270   C->preallocated = PETSC_TRUE;
1271   ierr = PetscLogFlops(C->rmap.N);CHKERRQ(ierr);
1272   if (sctx.nshift){
1273     if (shiftnz) {
1274       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1275     } else if (shiftpd) {
1276       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1277     }
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 /*
1283   Version for when blocks are 1 by 1 Using natural ordering
1284 */
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering"
1287 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
1288 {
1289   Mat            C = *B;
1290   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1291   PetscErrorCode ierr;
1292   PetscInt       i,j,mbs = a->mbs;
1293   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1294   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1295   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1296   PetscReal      zeropivot,rs,shiftnz;
1297   PetscReal      shiftpd;
1298   ChShift_Ctx    sctx;
1299   PetscInt       newshift;
1300 
1301   PetscFunctionBegin;
1302   /* initialization */
1303   shiftnz   = info->shiftnz;
1304   shiftpd   = info->shiftpd;
1305   zeropivot = info->zeropivot;
1306 
1307   /* il and jl record the first nonzero element in each row of the accessing
1308      window U(0:k, k:mbs-1).
1309      jl:    list of rows to be added to uneliminated rows
1310             i>= k: jl(i) is the first row to be added to row i
1311             i<  k: jl(i) is the row following row i in some list of rows
1312             jl(i) = mbs indicates the end of a list
1313      il(i): points to the first nonzero element in U(i,k:mbs-1)
1314   */
1315   ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1316   ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr);
1317   jl   = il + mbs;
1318 
1319   sctx.shift_amount = 0;
1320   sctx.nshift       = 0;
1321   do {
1322     sctx.chshift = PETSC_FALSE;
1323     for (i=0; i<mbs; i++) {
1324       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1325     }
1326 
1327     for (k = 0; k<mbs; k++){
1328       /*initialize k-th row with elements nonzero in row perm(k) of A */
1329       nz   = ai[k+1] - ai[k];
1330       acol = aj + ai[k];
1331       aval = aa + ai[k];
1332       bval = ba + bi[k];
1333       while (nz -- ){
1334         rtmp[*acol++] = *aval++;
1335         *bval++       = 0.0; /* for in-place factorization */
1336       }
1337 
1338       /* shift the diagonal of the matrix */
1339       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1340 
1341       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1342       dk = rtmp[k];
1343       i  = jl[k]; /* first row to be added to k_th row  */
1344 
1345       while (i < k){
1346         nexti = jl[i]; /* next row to be added to k_th row */
1347         /* compute multiplier, update D(k) and U(i,k) */
1348         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1349         uikdi = - ba[ili]*ba[bi[i]];
1350         dk   += uikdi*ba[ili];
1351         ba[ili] = uikdi; /* -U(i,k) */
1352 
1353         /* add multiple of row i to k-th row ... */
1354         jmin = ili + 1;
1355         nz   = bi[i+1] - jmin;
1356         if (nz > 0){
1357           bcol = bj + jmin;
1358           bval = ba + jmin;
1359           ierr = PetscLogFlops(2*nz);CHKERRQ(ierr);
1360           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1361 
1362           /* update il and jl for i-th row */
1363           il[i] = jmin;
1364           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1365         }
1366         i = nexti;
1367       }
1368 
1369       /* shift the diagonals when zero pivot is detected */
1370       /* compute rs=sum of abs(off-diagonal) */
1371       rs   = 0.0;
1372       jmin = bi[k]+1;
1373       nz   = bi[k+1] - jmin;
1374       if (nz){
1375         bcol = bj + jmin;
1376         while (nz--){
1377           rs += PetscAbsScalar(rtmp[*bcol]);
1378           bcol++;
1379         }
1380       }
1381 
1382       sctx.rs = rs;
1383       sctx.pv = dk;
1384       ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr);
1385       if (newshift == 1) break;    /* sctx.shift_amount is updated */
1386 
1387       /* copy data into U(k,:) */
1388       ba[bi[k]] = 1.0/dk;
1389       jmin      = bi[k]+1;
1390       nz        = bi[k+1] - jmin;
1391       if (nz){
1392         bcol = bj + jmin;
1393         bval = ba + jmin;
1394         while (nz--){
1395           *bval++       = rtmp[*bcol];
1396           rtmp[*bcol++] = 0.0;
1397         }
1398         /* add k-th row into il and jl */
1399         il[k] = jmin;
1400         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1401       }
1402     } /* end of for (k = 0; k<mbs; k++) */
1403   } while (sctx.chshift);
1404   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1405   ierr = PetscFree(il);CHKERRQ(ierr);
1406 
1407   C->factor       = MAT_FACTOR_CHOLESKY;
1408   C->assembled    = PETSC_TRUE;
1409   C->preallocated = PETSC_TRUE;
1410   ierr = PetscLogFlops(C->rmap.N);CHKERRQ(ierr);
1411   if (sctx.nshift){
1412     if (shiftnz) {
1413       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1414     } else if (shiftpd) {
1415       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr);
1416     }
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ"
1423 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1424 {
1425   PetscErrorCode ierr;
1426   Mat            C;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr);
1430   ierr = MatCholeskyFactorNumeric(A,info,&C);CHKERRQ(ierr);
1431   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 
1436