xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision f3086b4b6c0607d723a265a488c24324551a6618)
15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
35d517e7eSBarry Smith */
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
54078e994SBarry Smith #include "src/inline/ilu.h"
6ec3a8b7bSBarry Smith 
76d84be18SBarry Smith /* ------------------------------------------------------------*/
86d84be18SBarry Smith /*
94eeb42bcSBarry Smith       Version for when blocks are 2 by 2
104eeb42bcSBarry Smith */
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13af281ebdSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,MatFactorInfo *info,Mat *B)
144eeb42bcSBarry Smith {
154eeb42bcSBarry Smith   Mat            C = *B;
164eeb42bcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
177cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
186849ba73SBarry Smith   PetscErrorCode ierr;
19690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
21690b6cddSBarry Smith   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
232515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4;
243f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
254eeb42bcSBarry Smith 
263a40ed3dSBarry Smith   PetscFunctionBegin;
274eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
284eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29b0a32e0cSBarry Smith   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
304eeb42bcSBarry Smith 
314eeb42bcSBarry Smith   for (i=0; i<n; i++) {
324078e994SBarry Smith     nz    = bi[i+1] - bi[i];
334078e994SBarry Smith     ajtmp = bj + bi[i];
344eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
354eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
364eeb42bcSBarry Smith     }
374eeb42bcSBarry Smith     /* load in initial (unfactored row) */
384eeb42bcSBarry Smith     idx      = r[i];
394078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
404078e994SBarry Smith     ajtmpold = aj + ai[idx];
414078e994SBarry Smith     v        = aa + 4*ai[idx];
424eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
434eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
444eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
454eeb42bcSBarry Smith       v    += 4;
464eeb42bcSBarry Smith     }
474eeb42bcSBarry Smith     row = *ajtmp++;
484eeb42bcSBarry Smith     while (row < i) {
494eeb42bcSBarry Smith       pc = rtmp + 4*row;
504eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5188685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
524078e994SBarry Smith         pv = ba + 4*diag_offset[row];
534078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
544eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
554eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
564eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
574eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
584eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
594078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
604eeb42bcSBarry Smith         pv += 4;
614eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
624eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
634eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
644eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
654eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
664eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
674eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
684eeb42bcSBarry Smith           pv   += 4;
694eeb42bcSBarry Smith         }
70b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
714eeb42bcSBarry Smith       }
724eeb42bcSBarry Smith       row = *ajtmp++;
734eeb42bcSBarry Smith     }
744eeb42bcSBarry Smith     /* finished row so stick it into b->a */
754078e994SBarry Smith     pv = ba + 4*bi[i];
764078e994SBarry Smith     pj = bj + bi[i];
774078e994SBarry Smith     nz = bi[i+1] - bi[i];
784eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
794eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
804eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
814eeb42bcSBarry Smith       pv   += 4;
824eeb42bcSBarry Smith     }
834eeb42bcSBarry Smith     /* invert diagonal block */
844078e994SBarry Smith     w = ba + 4*diag_offset[i];
854cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
864eeb42bcSBarry Smith   }
874eeb42bcSBarry Smith 
88606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
894eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
904eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
914eeb42bcSBarry Smith   C->factor = FACTOR_LU;
924eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
93b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
943a40ed3dSBarry Smith   PetscFunctionReturn(0);
954eeb42bcSBarry Smith }
964cd38560SBarry Smith /*
974cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
984cd38560SBarry Smith */
994a2ae208SSatish Balay #undef __FUNCT__
1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101af281ebdSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
1024cd38560SBarry Smith {
1034cd38560SBarry Smith   Mat            C = *B;
1044cd38560SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105dfbe8321SBarry Smith   PetscErrorCode ierr;
106690b6cddSBarry Smith   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
108690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
1102515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1114cd38560SBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1124cd38560SBarry Smith 
1134cd38560SBarry Smith   PetscFunctionBegin;
114b0a32e0cSBarry Smith   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1154cd38560SBarry Smith 
1164cd38560SBarry Smith   for (i=0; i<n; i++) {
1174cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1184cd38560SBarry Smith     ajtmp = bj + bi[i];
1194cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1204cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1214cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1224cd38560SBarry Smith     }
1234cd38560SBarry Smith     /* load in initial (unfactored row) */
1244cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1254cd38560SBarry Smith     ajtmpold = aj + ai[i];
1264cd38560SBarry Smith     v        = aa + 4*ai[i];
1274cd38560SBarry Smith     for (j=0; j<nz; j++) {
1284cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1294cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1304cd38560SBarry Smith       v    += 4;
1314cd38560SBarry Smith     }
1324cd38560SBarry Smith     row = *ajtmp++;
1334cd38560SBarry Smith     while (row < i) {
1344cd38560SBarry Smith       pc  = rtmp + 4*row;
1354cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1364cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1374cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1384cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1394cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1404cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1414cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1424cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1434cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1444cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1454cd38560SBarry Smith         pv += 4;
1464cd38560SBarry Smith         for (j=0; j<nz; j++) {
1474cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1484cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1494cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1504cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1514cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1524cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1534cd38560SBarry Smith           pv   += 4;
1544cd38560SBarry Smith         }
155b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1564cd38560SBarry Smith       }
1574cd38560SBarry Smith       row = *ajtmp++;
1584cd38560SBarry Smith     }
1594cd38560SBarry Smith     /* finished row so stick it into b->a */
1604cd38560SBarry Smith     pv = ba + 4*bi[i];
1614cd38560SBarry Smith     pj = bj + bi[i];
1624cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1634cd38560SBarry Smith     for (j=0; j<nz; j++) {
1644cd38560SBarry Smith       x      = rtmp+4*pj[j];
1654cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1664cd38560SBarry Smith       pv   += 4;
1674cd38560SBarry Smith     }
1684cd38560SBarry Smith     /* invert diagonal block */
1694cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1704cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1714cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1724cd38560SBarry Smith   }
1734cd38560SBarry Smith 
174606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1754cd38560SBarry Smith   C->factor    = FACTOR_LU;
1764cd38560SBarry Smith   C->assembled = PETSC_TRUE;
177b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1784cd38560SBarry Smith   PetscFunctionReturn(0);
1794cd38560SBarry Smith }
1807fc0212eSBarry Smith 
1817fc0212eSBarry Smith /* ----------------------------------------------------------- */
1827fc0212eSBarry Smith /*
1837fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1847fc0212eSBarry Smith */
1854a2ae208SSatish Balay #undef __FUNCT__
1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187af281ebdSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,MatFactorInfo *info,Mat *B)
1887fc0212eSBarry Smith {
1897fc0212eSBarry Smith   Mat            C = *B;
1907fc0212eSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1917cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
1926849ba73SBarry Smith   PetscErrorCode ierr;
193690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
194690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,diag,*pj;
196329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
1973f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1987fc0212eSBarry Smith 
1993a40ed3dSBarry Smith   PetscFunctionBegin;
2007fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2017fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202b0a32e0cSBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2037fc0212eSBarry Smith 
2047fc0212eSBarry Smith   for (i=0; i<n; i++) {
2054078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2064078e994SBarry Smith     ajtmp = bj + bi[i];
2077fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2087fc0212eSBarry Smith 
2097fc0212eSBarry Smith     /* load in initial (unfactored row) */
2104078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2114078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2124078e994SBarry Smith     v        = aa + ai[r[i]];
2137fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2147fc0212eSBarry Smith 
2157fc0212eSBarry Smith     row = *ajtmp++;
2167fc0212eSBarry Smith     while (row < i) {
2177fc0212eSBarry Smith       pc = rtmp + row;
2187fc0212eSBarry Smith       if (*pc != 0.0) {
2194078e994SBarry Smith         pv         = ba + diag_offset[row];
2204078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2217fc0212eSBarry Smith         multiplier = *pc * *pv++;
2227fc0212eSBarry Smith         *pc        = multiplier;
2234078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2247fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2267fc0212eSBarry Smith       }
2277fc0212eSBarry Smith       row = *ajtmp++;
2287fc0212eSBarry Smith     }
2297fc0212eSBarry Smith     /* finished row so stick it into b->a */
2304078e994SBarry Smith     pv = ba + bi[i];
2314078e994SBarry Smith     pj = bj + bi[i];
2324078e994SBarry Smith     nz = bi[i+1] - bi[i];
2337fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2344078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2357fc0212eSBarry Smith     /* check pivot entry for current row */
236a73cf429SBarry Smith     if (pv[diag] == 0.0) {
23729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2387fc0212eSBarry Smith     }
2397fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2407fc0212eSBarry Smith   }
2417fc0212eSBarry Smith 
242606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2437fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2447fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2457fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2467fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
247b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2483a40ed3dSBarry Smith   PetscFunctionReturn(0);
2497fc0212eSBarry Smith }
2507fc0212eSBarry Smith 
251273d9f13SBarry Smith 
2525d517e7eSBarry Smith /* ----------------------------------------------------------- */
2534a2ae208SSatish Balay #undef __FUNCT__
2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
2565d517e7eSBarry Smith {
257dfbe8321SBarry Smith   PetscErrorCode ierr;
2585d517e7eSBarry Smith   Mat            C;
2595d517e7eSBarry Smith 
2603a40ed3dSBarry Smith   PetscFunctionBegin;
261b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
262af281ebdSHong Zhang   ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr);
263273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
2665d517e7eSBarry Smith }
2674cd38560SBarry Smith 
268c05c3958SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h"
269c05c3958SHong Zhang #undef __FUNCT__
270c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N"
271af281ebdSHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,MatFactorInfo *info,Mat *B)
272c05c3958SHong Zhang {
273*f3086b4bSHong Zhang   PetscErrorCode ierr;
27478910aadSHong Zhang   Mat            C = *B;
27578910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
27678910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
27778910aadSHong Zhang   IS             ip=b->row;
27878910aadSHong Zhang   PetscInt       *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol;
27978910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j;
28078910aadSHong Zhang   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
28178910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
28278910aadSHong Zhang   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
28378910aadSHong Zhang   PetscTruth     damp,chshift;
28478910aadSHong Zhang   PetscInt       nshift=0,ndamp=0;
28578910aadSHong Zhang 
286c05c3958SHong Zhang   PetscFunctionBegin;
2876ad2eaddSHong Zhang   if (bs > 1) {
2886ad2eaddSHong Zhang     if (!a->sbaijMat){
2896ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
2906ad2eaddSHong Zhang     }
291*f3086b4bSHong Zhang     ierr = (a->sbaijMat)->ops->choleskyfactornumeric(a->sbaijMat,info,B);CHKERRQ(ierr);
2926ad2eaddSHong Zhang     ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
2936ad2eaddSHong Zhang     a->sbaijMat = PETSC_NULL;
2946ad2eaddSHong Zhang     PetscFunctionReturn(0);
2956ad2eaddSHong Zhang   }
29678910aadSHong Zhang 
29778910aadSHong Zhang   /* initialization */
2986ad2eaddSHong Zhang   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
29978910aadSHong Zhang   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
30078910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
30178910aadSHong Zhang   jl   = il + mbs;
30278910aadSHong Zhang   rtmp = (MatScalar*)(jl + mbs);
30378910aadSHong Zhang 
30478910aadSHong Zhang   shift_amount = 0;
30578910aadSHong Zhang   do {
30678910aadSHong Zhang     damp = PETSC_FALSE;
30778910aadSHong Zhang     chshift = PETSC_FALSE;
30878910aadSHong Zhang     for (i=0; i<mbs; i++) {
30978910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
31078910aadSHong Zhang     }
31178910aadSHong Zhang 
31278910aadSHong Zhang     for (k = 0; k<mbs; k++){
31378910aadSHong Zhang       bval = ba + bi[k];
31478910aadSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
31578910aadSHong Zhang       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
31678910aadSHong Zhang       for (j = jmin; j < jmax; j++){
31778910aadSHong Zhang         col = rip[aj[j]];
31878910aadSHong Zhang         if (col >= k){ /* only take upper triangular entry */
31978910aadSHong Zhang           rtmp[col] = aa[j];
32078910aadSHong Zhang           *bval++  = 0.0; /* for in-place factorization */
32178910aadSHong Zhang         }
32278910aadSHong Zhang       }
32378910aadSHong Zhang       /* damp the diagonal of the matrix */
32478910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
32578910aadSHong Zhang 
32678910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
32778910aadSHong Zhang       dk = rtmp[k];
32878910aadSHong Zhang       i = jl[k]; /* first row to be added to k_th row  */
32978910aadSHong Zhang 
33078910aadSHong Zhang       while (i < k){
33178910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
33278910aadSHong Zhang 
33378910aadSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
33478910aadSHong Zhang         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
33578910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
33678910aadSHong Zhang         dk += uikdi*ba[ili];
33778910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
33878910aadSHong Zhang 
33978910aadSHong Zhang         /* add multiple of row i to k-th row */
34078910aadSHong Zhang         jmin = ili + 1; jmax = bi[i+1];
34178910aadSHong Zhang         if (jmin < jmax){
34278910aadSHong Zhang           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
34378910aadSHong Zhang           /* update il and jl for row i */
34478910aadSHong Zhang           il[i] = jmin;
34578910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
34678910aadSHong Zhang         }
34778910aadSHong Zhang         i = nexti;
34878910aadSHong Zhang       }
34978910aadSHong Zhang 
35078910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
35178910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
35278910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
35378910aadSHong Zhang 	jmin      = bi[k]+1;
35478910aadSHong Zhang 	nz        = bi[k+1] - jmin;
35578910aadSHong Zhang 	if (nz){
35678910aadSHong Zhang 	  bcol = bj + jmin;
35778910aadSHong Zhang 	  bval = ba + jmin;
35878910aadSHong Zhang 	  while (nz--){
35978910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
36078910aadSHong Zhang 	  }
36178910aadSHong Zhang 	}
36278910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
36378910aadSHong Zhang 	   one by a bit */
36478910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
36578910aadSHong Zhang 	chshift  = PETSC_TRUE;
36678910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
36778910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
36878910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
36978910aadSHong Zhang 	   than the ILU algorithm. */
37078910aadSHong Zhang 	nshift++;
37178910aadSHong Zhang 	break;
37278910aadSHong Zhang       }
37378910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
37478910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
37578910aadSHong Zhang         if (damping > 0.0) {
37678910aadSHong Zhang           if (ndamp) damping *= 2.0;
37778910aadSHong Zhang           damp = PETSC_TRUE;
37878910aadSHong Zhang           ndamp++;
37978910aadSHong Zhang           break;
38078910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
38178910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
38278910aadSHong Zhang         } else {
38378910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
38478910aadSHong Zhang         }
38578910aadSHong Zhang       }
38678910aadSHong Zhang 
38778910aadSHong Zhang       /* copy data into U(k,:) */
38878910aadSHong Zhang       ba[bi[k]] = 1.0/dk; /* U(k,k) */
38978910aadSHong Zhang       jmin = bi[k]+1; jmax = bi[k+1];
39078910aadSHong Zhang       if (jmin < jmax) {
39178910aadSHong Zhang         for (j=jmin; j<jmax; j++){
39278910aadSHong Zhang           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
39378910aadSHong Zhang         }
39478910aadSHong Zhang         /* add the k-th row into il and jl */
39578910aadSHong Zhang         il[k] = jmin;
39678910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
39778910aadSHong Zhang       }
39878910aadSHong Zhang     }
39978910aadSHong Zhang   } while (damp||chshift);
40078910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
40178910aadSHong Zhang 
40278910aadSHong Zhang   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
40378910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
40478910aadSHong Zhang   C->assembled    = PETSC_TRUE;
40578910aadSHong Zhang   C->preallocated = PETSC_TRUE;
40678910aadSHong Zhang   PetscLogFlops(C->m);
40778910aadSHong Zhang   if (ndamp) {
408*f3086b4bSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
40978910aadSHong Zhang   }
41078910aadSHong Zhang   if (nshift) {
41178910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift);
41278910aadSHong Zhang   }
413c05c3958SHong Zhang   PetscFunctionReturn(0);
414c05c3958SHong Zhang }
4154cd38560SBarry Smith 
416c05c3958SHong Zhang #undef __FUNCT__
417c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering"
418af281ebdSHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *fact)
419c05c3958SHong Zhang {
42078910aadSHong Zhang   Mat            C = *fact;
42178910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
42278910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
42378910aadSHong Zhang   PetscErrorCode ierr;
42478910aadSHong Zhang   PetscInt       i,j,am=a->mbs,bs=A->bs;
42578910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
42678910aadSHong Zhang   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
42778910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
42878910aadSHong Zhang   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
42978910aadSHong Zhang   PetscTruth     damp,chshift;
43078910aadSHong Zhang   PetscInt       nshift=0;
43178910aadSHong Zhang 
432c05c3958SHong Zhang   PetscFunctionBegin;
4336ad2eaddSHong Zhang   if (bs > 1) {
4346ad2eaddSHong Zhang     SETERRQ(PETSC_ERR_USER,"not done yet");
4356ad2eaddSHong Zhang   }
4366ad2eaddSHong Zhang 
43778910aadSHong Zhang   /* initialization */
43878910aadSHong Zhang   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
43978910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
44078910aadSHong Zhang   jl   = il + am;
44178910aadSHong Zhang   rtmp = (MatScalar*)(jl + am);
44278910aadSHong Zhang 
44378910aadSHong Zhang   shift_amount = 0;
44478910aadSHong Zhang   do {
44578910aadSHong Zhang     damp = PETSC_FALSE;
44678910aadSHong Zhang     chshift = PETSC_FALSE;
44778910aadSHong Zhang     for (i=0; i<am; i++) {
44878910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
44978910aadSHong Zhang     }
45078910aadSHong Zhang 
45178910aadSHong Zhang     for (k = 0; k<am; k++){
45278910aadSHong Zhang     /* initialize k-th row with elements nonzero in row perm(k) of A */
45378910aadSHong Zhang       nz   = ai[k+1] - ai[k];
45478910aadSHong Zhang       acol = aj + ai[k];
45578910aadSHong Zhang       aval = aa + ai[k];
45678910aadSHong Zhang       bval = ba + bi[k];
45778910aadSHong Zhang       while (nz -- ){
45878910aadSHong Zhang         if (*acol < k) { /* skip lower triangular entries */
45978910aadSHong Zhang           acol++; aval++;
46078910aadSHong Zhang         } else {
46178910aadSHong Zhang           rtmp[*acol++] = *aval++;
46278910aadSHong Zhang           *bval++       = 0.0; /* for in-place factorization */
46378910aadSHong Zhang         }
46478910aadSHong Zhang       }
46578910aadSHong Zhang       /* damp the diagonal of the matrix */
46678910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
46778910aadSHong Zhang 
46878910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
46978910aadSHong Zhang       dk = rtmp[k];
47078910aadSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
47178910aadSHong Zhang 
47278910aadSHong Zhang       while (i < k){
47378910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
47478910aadSHong Zhang         /* compute multiplier, update D(k) and U(i,k) */
47578910aadSHong Zhang         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
47678910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];
47778910aadSHong Zhang         dk   += uikdi*ba[ili];
47878910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
47978910aadSHong Zhang 
48078910aadSHong Zhang         /* add multiple of row i to k-th row ... */
48178910aadSHong Zhang         jmin = ili + 1;
48278910aadSHong Zhang         nz   = bi[i+1] - jmin;
48378910aadSHong Zhang         if (nz > 0){
48478910aadSHong Zhang           bcol = bj + jmin;
48578910aadSHong Zhang           bval = ba + jmin;
48678910aadSHong Zhang           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
48778910aadSHong Zhang           /* update il and jl for i-th row */
48878910aadSHong Zhang           il[i] = jmin;
48978910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
49078910aadSHong Zhang         }
49178910aadSHong Zhang         i = nexti;
49278910aadSHong Zhang       }
49378910aadSHong Zhang 
49478910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
49578910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
49678910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
49778910aadSHong Zhang 	jmin      = bi[k]+1;
49878910aadSHong Zhang 	nz        = bi[k+1] - jmin;
49978910aadSHong Zhang 	if (nz){
50078910aadSHong Zhang 	  bcol = bj + jmin;
50178910aadSHong Zhang 	  bval = ba + jmin;
50278910aadSHong Zhang 	  while (nz--){
50378910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
50478910aadSHong Zhang 	  }
50578910aadSHong Zhang 	}
50678910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
50778910aadSHong Zhang 	   one by a bit */
50878910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
50978910aadSHong Zhang 	chshift  = PETSC_TRUE;
51078910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
51178910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
51278910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
51378910aadSHong Zhang 	   than the ILU algorithm. */
51478910aadSHong Zhang 	nshift++;
51578910aadSHong Zhang 	break;
51678910aadSHong Zhang       }
51778910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
51878910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
51978910aadSHong Zhang         if (damping > 0.0) {
52078910aadSHong Zhang           if (ndamp) damping *= 2.0;
52178910aadSHong Zhang           damp = PETSC_TRUE;
52278910aadSHong Zhang           ndamp++;
52378910aadSHong Zhang           break;
52478910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
52578910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
52678910aadSHong Zhang         } else {
52778910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
52878910aadSHong Zhang         }
52978910aadSHong Zhang       }
53078910aadSHong Zhang 
53178910aadSHong Zhang       /* copy data into U(k,:) */
53278910aadSHong Zhang       ba[bi[k]] = 1.0/dk;
53378910aadSHong Zhang       jmin      = bi[k]+1;
53478910aadSHong Zhang       nz        = bi[k+1] - jmin;
53578910aadSHong Zhang       if (nz){
53678910aadSHong Zhang         bcol = bj + jmin;
53778910aadSHong Zhang         bval = ba + jmin;
53878910aadSHong Zhang         while (nz--){
53978910aadSHong Zhang           *bval++       = rtmp[*bcol];
54078910aadSHong Zhang           rtmp[*bcol++] = 0.0;
54178910aadSHong Zhang         }
54278910aadSHong Zhang         /* add k-th row into il and jl */
54378910aadSHong Zhang         il[k] = jmin;
54478910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
54578910aadSHong Zhang       }
54678910aadSHong Zhang     }
54778910aadSHong Zhang   } while (damp||chshift);
54878910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
54978910aadSHong Zhang 
55078910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
55178910aadSHong Zhang   C->assembled    = PETSC_TRUE;
55278910aadSHong Zhang   C->preallocated = PETSC_TRUE;
55378910aadSHong Zhang   PetscLogFlops(C->m);
55478910aadSHong Zhang   if (ndamp) {
555*f3086b4bSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
55678910aadSHong Zhang   }
55778910aadSHong Zhang   if (nshift) {
55878910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
55978910aadSHong Zhang   }
560c05c3958SHong Zhang   PetscFunctionReturn(0);
561c05c3958SHong Zhang }
562c05c3958SHong Zhang 
563c05c3958SHong Zhang #include "petscbt.h"
564c05c3958SHong Zhang #include "src/mat/utils/freespace.h"
565c05c3958SHong Zhang #undef __FUNCT__
566c05c3958SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ"
567c05c3958SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
568c05c3958SHong Zhang {
56978910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
57078910aadSHong Zhang   Mat_SeqSBAIJ   *b;
57178910aadSHong Zhang   Mat            B;
57278910aadSHong Zhang   PetscErrorCode ierr;
57378910aadSHong Zhang   PetscTruth     perm_identity;
57478910aadSHong Zhang   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui;
57578910aadSHong Zhang   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
57678910aadSHong Zhang   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
57778910aadSHong Zhang   PetscReal      fill=info->fill,levels=info->levels;
57878910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
57978910aadSHong Zhang   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
58078910aadSHong Zhang   PetscBT        lnkbt;
58178910aadSHong Zhang 
582c05c3958SHong Zhang   PetscFunctionBegin;
5836ad2eaddSHong Zhang   if (bs > 1){
5846ad2eaddSHong Zhang     if (!a->sbaijMat){
5856ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
5866ad2eaddSHong Zhang     }
5876ad2eaddSHong Zhang     ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
5886ad2eaddSHong Zhang     B = *fact;
5896ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
5906ad2eaddSHong Zhang     PetscFunctionReturn(0);
5916ad2eaddSHong Zhang   }
5926ad2eaddSHong Zhang 
59378910aadSHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
59478910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
59578910aadSHong Zhang 
59678910aadSHong Zhang   /* special case that simply copies fill pattern */
59778910aadSHong Zhang   if (!levels && perm_identity) {
59878910aadSHong Zhang     ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
59978910aadSHong Zhang     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
60078910aadSHong Zhang     for (i=0; i<am; i++) {
60178910aadSHong Zhang       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
60278910aadSHong Zhang     }
60378910aadSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
60478910aadSHong Zhang     B = *fact;
60578910aadSHong Zhang     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
60678910aadSHong Zhang     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
60778910aadSHong Zhang 
60878910aadSHong Zhang     b  = (Mat_SeqSBAIJ*)B->data;
60978910aadSHong Zhang     uj = b->j;
61078910aadSHong Zhang     for (i=0; i<am; i++) {
61178910aadSHong Zhang       aj = a->j + a->diag[i];
61278910aadSHong Zhang       for (j=0; j<ui[i]; j++){
61378910aadSHong Zhang         *uj++ = *aj++;
61478910aadSHong Zhang       }
61578910aadSHong Zhang       b->ilen[i] = ui[i];
61678910aadSHong Zhang     }
61778910aadSHong Zhang     ierr = PetscFree(ui);CHKERRQ(ierr);
61878910aadSHong Zhang     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
61978910aadSHong Zhang     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
62078910aadSHong Zhang 
62178910aadSHong Zhang     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
62278910aadSHong Zhang     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
62378910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
62478910aadSHong Zhang     PetscFunctionReturn(0);
62578910aadSHong Zhang   }
62678910aadSHong Zhang 
62778910aadSHong Zhang   /* initialization */
62878910aadSHong Zhang   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
62978910aadSHong Zhang   ui[0] = 0;
63078910aadSHong Zhang   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
63178910aadSHong Zhang 
63278910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
63378910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
63478910aadSHong Zhang   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
63578910aadSHong Zhang   il         = jl + am;
63678910aadSHong Zhang   uj_ptr     = (PetscInt**)(il + am);
63778910aadSHong Zhang   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
63878910aadSHong Zhang   for (i=0; i<am; i++){
63978910aadSHong Zhang     jl[i] = am; il[i] = 0;
64078910aadSHong Zhang   }
64178910aadSHong Zhang 
64278910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
64378910aadSHong Zhang   nlnk = am + 1;
64478910aadSHong Zhang   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
64578910aadSHong Zhang 
64678910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
64778910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
64878910aadSHong Zhang   current_space = free_space;
64978910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
65078910aadSHong Zhang   current_space_lvl = free_space_lvl;
65178910aadSHong Zhang 
65278910aadSHong Zhang   for (k=0; k<am; k++){  /* for each active row k */
65378910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
65478910aadSHong Zhang     nzk   = 0;
65578910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
65678910aadSHong Zhang     ncols_upper = 0;
65778910aadSHong Zhang     cols        = cols_lvl + am;
65878910aadSHong Zhang     for (j=0; j<ncols; j++){
65978910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
66078910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
66178910aadSHong Zhang         cols[ncols_upper] = i;
66278910aadSHong Zhang         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
66378910aadSHong Zhang         ncols_upper++;
66478910aadSHong Zhang       }
66578910aadSHong Zhang     }
66678910aadSHong Zhang     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
66778910aadSHong Zhang     nzk += nlnk;
66878910aadSHong Zhang 
66978910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
67078910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
67178910aadSHong Zhang 
67278910aadSHong Zhang     while (prow < k){
67378910aadSHong Zhang       nextprow = jl[prow];
67478910aadSHong Zhang 
67578910aadSHong Zhang       /* merge prow into k-th row */
67678910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
67778910aadSHong Zhang       jmax = ui[prow+1];
67878910aadSHong Zhang       ncols = jmax-jmin;
67978910aadSHong Zhang       i     = jmin - ui[prow];
68078910aadSHong Zhang       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
68178910aadSHong Zhang       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
68278910aadSHong Zhang       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
68378910aadSHong Zhang       nzk += nlnk;
68478910aadSHong Zhang 
68578910aadSHong Zhang       /* update il and jl for prow */
68678910aadSHong Zhang       if (jmin < jmax){
68778910aadSHong Zhang         il[prow] = jmin;
68878910aadSHong Zhang         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
68978910aadSHong Zhang       }
69078910aadSHong Zhang       prow = nextprow;
69178910aadSHong Zhang     }
69278910aadSHong Zhang 
69378910aadSHong Zhang     /* if free space is not available, make more free space */
69478910aadSHong Zhang     if (current_space->local_remaining<nzk) {
69578910aadSHong Zhang       i = am - k + 1; /* num of unfactored rows */
69678910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
69778910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
69878910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
69978910aadSHong Zhang       reallocs++;
70078910aadSHong Zhang     }
70178910aadSHong Zhang 
70278910aadSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
70378910aadSHong Zhang     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
70478910aadSHong Zhang 
70578910aadSHong Zhang     /* add the k-th row into il and jl */
70678910aadSHong Zhang     if (nzk-1 > 0){
70778910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
70878910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
70978910aadSHong Zhang       il[k] = ui[k] + 1;
71078910aadSHong Zhang     }
71178910aadSHong Zhang     uj_ptr[k]     = current_space->array;
71278910aadSHong Zhang     uj_lvl_ptr[k] = current_space_lvl->array;
71378910aadSHong Zhang 
71478910aadSHong Zhang     current_space->array           += nzk;
71578910aadSHong Zhang     current_space->local_used      += nzk;
71678910aadSHong Zhang     current_space->local_remaining -= nzk;
71778910aadSHong Zhang 
71878910aadSHong Zhang     current_space_lvl->array           += nzk;
71978910aadSHong Zhang     current_space_lvl->local_used      += nzk;
72078910aadSHong Zhang     current_space_lvl->local_remaining -= nzk;
72178910aadSHong Zhang 
72278910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
72378910aadSHong Zhang   }
72478910aadSHong Zhang 
72578910aadSHong Zhang   if (ai[am] != 0) {
72678910aadSHong Zhang     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
72778910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
72878910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
72978910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
73078910aadSHong Zhang   } else {
73178910aadSHong Zhang      PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n");
73278910aadSHong Zhang   }
73378910aadSHong Zhang 
73478910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
73578910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
73678910aadSHong Zhang   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
73778910aadSHong Zhang 
73878910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
73978910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
74078910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
74178910aadSHong Zhang   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
74278910aadSHong Zhang   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
74378910aadSHong Zhang 
74478910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
74578910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
74678910aadSHong Zhang   B = *fact;
74778910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
74878910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
74978910aadSHong Zhang 
75078910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
75178910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
75278910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
75378910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
75478910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
75578910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
75678910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
75778910aadSHong Zhang   b->j    = uj;
75878910aadSHong Zhang   b->i    = ui;
75978910aadSHong Zhang   b->diag = 0;
76078910aadSHong Zhang   b->ilen = 0;
76178910aadSHong Zhang   b->imax = 0;
76278910aadSHong Zhang   b->row  = perm;
76378910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
76478910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
76578910aadSHong Zhang   b->icol = perm;
76678910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
76778910aadSHong Zhang   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
76878910aadSHong Zhang   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
76978910aadSHong Zhang   b->maxnz = b->nz = ui[am];
77078910aadSHong Zhang 
77178910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
77278910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
77378910aadSHong Zhang   B->info.fill_ratio_given  = fill;
77478910aadSHong Zhang   if (ai[am] != 0) {
77578910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
77678910aadSHong Zhang   } else {
77778910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
77878910aadSHong Zhang   }
77978910aadSHong Zhang   if (perm_identity){
78078910aadSHong Zhang     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
78178910aadSHong Zhang     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
78278910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
78378910aadSHong Zhang   } else {
78478910aadSHong Zhang     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
78578910aadSHong Zhang   }
786c05c3958SHong Zhang   PetscFunctionReturn(0);
787c05c3958SHong Zhang }
788c05c3958SHong Zhang 
789c05c3958SHong Zhang #undef __FUNCT__
790c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ"
791c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
792c05c3958SHong Zhang {
79378910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
79478910aadSHong Zhang   Mat_SeqSBAIJ   *b;
79578910aadSHong Zhang   Mat            B;
79678910aadSHong Zhang   PetscErrorCode ierr;
79778910aadSHong Zhang   PetscTruth     perm_identity;
79878910aadSHong Zhang   PetscReal      fill = info->fill;
79978910aadSHong Zhang   PetscInt       *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
80078910aadSHong Zhang   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
80178910aadSHong Zhang   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
80278910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
80378910aadSHong Zhang   PetscBT        lnkbt;
80478910aadSHong Zhang   IS             iperm;
80578910aadSHong Zhang 
806c05c3958SHong Zhang   PetscFunctionBegin;
8076ad2eaddSHong Zhang   if (bs > 1) { /* convert to seqsbaij */
8086ad2eaddSHong Zhang     if (!a->sbaijMat){
8096ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
8106ad2eaddSHong Zhang     }
8116ad2eaddSHong Zhang     ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
8126ad2eaddSHong Zhang     B    = *fact;
8136ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
8146ad2eaddSHong Zhang     PetscFunctionReturn(0);
8156ad2eaddSHong Zhang   }
8166ad2eaddSHong Zhang 
81778910aadSHong Zhang   /* check whether perm is the identity mapping */
81878910aadSHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
81978910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
82078910aadSHong Zhang 
82178910aadSHong Zhang   if (!perm_identity){
82278910aadSHong Zhang     /* check if perm is symmetric! */
82378910aadSHong Zhang     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
82478910aadSHong Zhang     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
82578910aadSHong Zhang     for (i=0; i<mbs; i++) {
82678910aadSHong Zhang       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
82778910aadSHong Zhang     }
82878910aadSHong Zhang     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
82978910aadSHong Zhang     ierr = ISDestroy(iperm);CHKERRQ(ierr);
83078910aadSHong Zhang   }
83178910aadSHong Zhang 
83278910aadSHong Zhang   /* initialization */
83378910aadSHong Zhang   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
83478910aadSHong Zhang   ui[0] = 0;
83578910aadSHong Zhang 
83678910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
83778910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
83878910aadSHong Zhang   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
83978910aadSHong Zhang   il     = jl + mbs;
84078910aadSHong Zhang   cols   = il + mbs;
84178910aadSHong Zhang   ui_ptr = (PetscInt**)(cols + mbs);
84278910aadSHong Zhang   for (i=0; i<mbs; i++){
84378910aadSHong Zhang     jl[i] = mbs; il[i] = 0;
84478910aadSHong Zhang   }
84578910aadSHong Zhang 
84678910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
84778910aadSHong Zhang   nlnk = mbs + 1;
84878910aadSHong Zhang   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
84978910aadSHong Zhang 
85078910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[mbs]+1) */
85178910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
85278910aadSHong Zhang   current_space = free_space;
85378910aadSHong Zhang 
85478910aadSHong Zhang   for (k=0; k<mbs; k++){  /* for each active row k */
85578910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
85678910aadSHong Zhang     nzk   = 0;
85778910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
85878910aadSHong Zhang     ncols_upper = 0;
85978910aadSHong Zhang     for (j=0; j<ncols; j++){
86078910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
86178910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
86278910aadSHong Zhang         cols[ncols_upper] = i;
86378910aadSHong Zhang         ncols_upper++;
86478910aadSHong Zhang       }
86578910aadSHong Zhang     }
86678910aadSHong Zhang     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
86778910aadSHong Zhang     nzk += nlnk;
86878910aadSHong Zhang 
86978910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
87078910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
87178910aadSHong Zhang 
87278910aadSHong Zhang     while (prow < k){
87378910aadSHong Zhang       nextprow = jl[prow];
87478910aadSHong Zhang       /* merge prow into k-th row */
87578910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
87678910aadSHong Zhang       jmax = ui[prow+1];
87778910aadSHong Zhang       ncols = jmax-jmin;
87878910aadSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
87978910aadSHong Zhang       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
88078910aadSHong Zhang       nzk += nlnk;
88178910aadSHong Zhang 
88278910aadSHong Zhang       /* update il and jl for prow */
88378910aadSHong Zhang       if (jmin < jmax){
88478910aadSHong Zhang         il[prow] = jmin;
88578910aadSHong Zhang         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
88678910aadSHong Zhang       }
88778910aadSHong Zhang       prow = nextprow;
88878910aadSHong Zhang     }
88978910aadSHong Zhang 
89078910aadSHong Zhang     /* if free space is not available, make more free space */
89178910aadSHong Zhang     if (current_space->local_remaining<nzk) {
89278910aadSHong Zhang       i = mbs - k + 1; /* num of unfactored rows */
89378910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
89478910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
89578910aadSHong Zhang       reallocs++;
89678910aadSHong Zhang     }
89778910aadSHong Zhang 
89878910aadSHong Zhang     /* copy data into free space, then initialize lnk */
89978910aadSHong Zhang     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
90078910aadSHong Zhang 
90178910aadSHong Zhang     /* add the k-th row into il and jl */
90278910aadSHong Zhang     if (nzk-1 > 0){
90378910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
90478910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
90578910aadSHong Zhang       il[k] = ui[k] + 1;
90678910aadSHong Zhang     }
90778910aadSHong Zhang     ui_ptr[k] = current_space->array;
90878910aadSHong Zhang     current_space->array           += nzk;
90978910aadSHong Zhang     current_space->local_used      += nzk;
91078910aadSHong Zhang     current_space->local_remaining -= nzk;
91178910aadSHong Zhang 
91278910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
91378910aadSHong Zhang   }
91478910aadSHong Zhang 
91578910aadSHong Zhang   if (ai[mbs] != 0) {
91678910aadSHong Zhang     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
91778910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
91878910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
91978910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
92078910aadSHong Zhang   } else {
92178910aadSHong Zhang      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
92278910aadSHong Zhang   }
92378910aadSHong Zhang 
92478910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
92578910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
92678910aadSHong Zhang 
92778910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
92878910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
92978910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
93078910aadSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
93178910aadSHong Zhang 
93278910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
93378910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
93478910aadSHong Zhang   B    = *fact;
93578910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
93678910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
93778910aadSHong Zhang 
93878910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
93978910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
94078910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
94178910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
94278910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
94378910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
94478910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
94578910aadSHong Zhang   b->j    = uj;
94678910aadSHong Zhang   b->i    = ui;
94778910aadSHong Zhang   b->diag = 0;
94878910aadSHong Zhang   b->ilen = 0;
94978910aadSHong Zhang   b->imax = 0;
95078910aadSHong Zhang   b->row  = perm;
95178910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
95278910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
95378910aadSHong Zhang   b->icol = perm;
95478910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
95578910aadSHong Zhang   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
95678910aadSHong Zhang   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
95778910aadSHong Zhang   b->maxnz = b->nz = ui[mbs];
95878910aadSHong Zhang 
95978910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
96078910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
96178910aadSHong Zhang   B->info.fill_ratio_given  = fill;
96278910aadSHong Zhang   if (ai[mbs] != 0) {
96378910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
96478910aadSHong Zhang   } else {
96578910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
96678910aadSHong Zhang   }
96778910aadSHong Zhang   if (perm_identity){
9686ad2eaddSHong Zhang     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
9696ad2eaddSHong Zhang     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
9706ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
97178910aadSHong Zhang   } else {
9726ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
97378910aadSHong Zhang   }
974c05c3958SHong Zhang   PetscFunctionReturn(0);
975c05c3958SHong Zhang }
976