xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision 78910aad7aab791d74ced88076e1a636de50be6c)
15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
35d517e7eSBarry Smith */
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
54078e994SBarry Smith #include "src/inline/ilu.h"
6ec3a8b7bSBarry Smith 
76d84be18SBarry Smith /* ------------------------------------------------------------*/
86d84be18SBarry Smith /*
94eeb42bcSBarry Smith       Version for when blocks are 2 by 2
104eeb42bcSBarry Smith */
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
144eeb42bcSBarry Smith {
154eeb42bcSBarry Smith   Mat            C = *B;
164eeb42bcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
177cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
186849ba73SBarry Smith   PetscErrorCode ierr;
19690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
21690b6cddSBarry Smith   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
232515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4;
243f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
254eeb42bcSBarry Smith 
263a40ed3dSBarry Smith   PetscFunctionBegin;
274eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
284eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29b0a32e0cSBarry Smith   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
304eeb42bcSBarry Smith 
314eeb42bcSBarry Smith   for (i=0; i<n; i++) {
324078e994SBarry Smith     nz    = bi[i+1] - bi[i];
334078e994SBarry Smith     ajtmp = bj + bi[i];
344eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
354eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
364eeb42bcSBarry Smith     }
374eeb42bcSBarry Smith     /* load in initial (unfactored row) */
384eeb42bcSBarry Smith     idx      = r[i];
394078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
404078e994SBarry Smith     ajtmpold = aj + ai[idx];
414078e994SBarry Smith     v        = aa + 4*ai[idx];
424eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
434eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
444eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
454eeb42bcSBarry Smith       v    += 4;
464eeb42bcSBarry Smith     }
474eeb42bcSBarry Smith     row = *ajtmp++;
484eeb42bcSBarry Smith     while (row < i) {
494eeb42bcSBarry Smith       pc = rtmp + 4*row;
504eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5188685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
524078e994SBarry Smith         pv = ba + 4*diag_offset[row];
534078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
544eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
554eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
564eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
574eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
584eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
594078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
604eeb42bcSBarry Smith         pv += 4;
614eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
624eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
634eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
644eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
654eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
664eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
674eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
684eeb42bcSBarry Smith           pv   += 4;
694eeb42bcSBarry Smith         }
70b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
714eeb42bcSBarry Smith       }
724eeb42bcSBarry Smith       row = *ajtmp++;
734eeb42bcSBarry Smith     }
744eeb42bcSBarry Smith     /* finished row so stick it into b->a */
754078e994SBarry Smith     pv = ba + 4*bi[i];
764078e994SBarry Smith     pj = bj + bi[i];
774078e994SBarry Smith     nz = bi[i+1] - bi[i];
784eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
794eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
804eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
814eeb42bcSBarry Smith       pv   += 4;
824eeb42bcSBarry Smith     }
834eeb42bcSBarry Smith     /* invert diagonal block */
844078e994SBarry Smith     w = ba + 4*diag_offset[i];
854cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
864eeb42bcSBarry Smith   }
874eeb42bcSBarry Smith 
88606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
894eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
904eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
914eeb42bcSBarry Smith   C->factor = FACTOR_LU;
924eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
93b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
943a40ed3dSBarry Smith   PetscFunctionReturn(0);
954eeb42bcSBarry Smith }
964cd38560SBarry Smith /*
974cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
984cd38560SBarry Smith */
994a2ae208SSatish Balay #undef __FUNCT__
1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1024cd38560SBarry Smith {
1034cd38560SBarry Smith   Mat            C = *B;
1044cd38560SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105dfbe8321SBarry Smith   PetscErrorCode ierr;
106690b6cddSBarry Smith   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
108690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
1102515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1114cd38560SBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1124cd38560SBarry Smith 
1134cd38560SBarry Smith   PetscFunctionBegin;
114b0a32e0cSBarry Smith   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1154cd38560SBarry Smith 
1164cd38560SBarry Smith   for (i=0; i<n; i++) {
1174cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1184cd38560SBarry Smith     ajtmp = bj + bi[i];
1194cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1204cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1214cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1224cd38560SBarry Smith     }
1234cd38560SBarry Smith     /* load in initial (unfactored row) */
1244cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1254cd38560SBarry Smith     ajtmpold = aj + ai[i];
1264cd38560SBarry Smith     v        = aa + 4*ai[i];
1274cd38560SBarry Smith     for (j=0; j<nz; j++) {
1284cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1294cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1304cd38560SBarry Smith       v    += 4;
1314cd38560SBarry Smith     }
1324cd38560SBarry Smith     row = *ajtmp++;
1334cd38560SBarry Smith     while (row < i) {
1344cd38560SBarry Smith       pc  = rtmp + 4*row;
1354cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1364cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1374cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1384cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1394cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1404cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1414cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1424cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1434cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1444cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1454cd38560SBarry Smith         pv += 4;
1464cd38560SBarry Smith         for (j=0; j<nz; j++) {
1474cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1484cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1494cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1504cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1514cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1524cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1534cd38560SBarry Smith           pv   += 4;
1544cd38560SBarry Smith         }
155b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1564cd38560SBarry Smith       }
1574cd38560SBarry Smith       row = *ajtmp++;
1584cd38560SBarry Smith     }
1594cd38560SBarry Smith     /* finished row so stick it into b->a */
1604cd38560SBarry Smith     pv = ba + 4*bi[i];
1614cd38560SBarry Smith     pj = bj + bi[i];
1624cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1634cd38560SBarry Smith     for (j=0; j<nz; j++) {
1644cd38560SBarry Smith       x      = rtmp+4*pj[j];
1654cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1664cd38560SBarry Smith       pv   += 4;
1674cd38560SBarry Smith     }
1684cd38560SBarry Smith     /* invert diagonal block */
1694cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1704cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1714cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1724cd38560SBarry Smith   }
1734cd38560SBarry Smith 
174606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1754cd38560SBarry Smith   C->factor    = FACTOR_LU;
1764cd38560SBarry Smith   C->assembled = PETSC_TRUE;
177b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1784cd38560SBarry Smith   PetscFunctionReturn(0);
1794cd38560SBarry Smith }
1807fc0212eSBarry Smith 
1817fc0212eSBarry Smith /* ----------------------------------------------------------- */
1827fc0212eSBarry Smith /*
1837fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1847fc0212eSBarry Smith */
1854a2ae208SSatish Balay #undef __FUNCT__
1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
1887fc0212eSBarry Smith {
1897fc0212eSBarry Smith   Mat            C = *B;
1907fc0212eSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1917cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
1926849ba73SBarry Smith   PetscErrorCode ierr;
193690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
194690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,diag,*pj;
196329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
1973f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1987fc0212eSBarry Smith 
1993a40ed3dSBarry Smith   PetscFunctionBegin;
2007fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2017fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202b0a32e0cSBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2037fc0212eSBarry Smith 
2047fc0212eSBarry Smith   for (i=0; i<n; i++) {
2054078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2064078e994SBarry Smith     ajtmp = bj + bi[i];
2077fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2087fc0212eSBarry Smith 
2097fc0212eSBarry Smith     /* load in initial (unfactored row) */
2104078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2114078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2124078e994SBarry Smith     v        = aa + ai[r[i]];
2137fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2147fc0212eSBarry Smith 
2157fc0212eSBarry Smith     row = *ajtmp++;
2167fc0212eSBarry Smith     while (row < i) {
2177fc0212eSBarry Smith       pc = rtmp + row;
2187fc0212eSBarry Smith       if (*pc != 0.0) {
2194078e994SBarry Smith         pv         = ba + diag_offset[row];
2204078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2217fc0212eSBarry Smith         multiplier = *pc * *pv++;
2227fc0212eSBarry Smith         *pc        = multiplier;
2234078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2247fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2267fc0212eSBarry Smith       }
2277fc0212eSBarry Smith       row = *ajtmp++;
2287fc0212eSBarry Smith     }
2297fc0212eSBarry Smith     /* finished row so stick it into b->a */
2304078e994SBarry Smith     pv = ba + bi[i];
2314078e994SBarry Smith     pj = bj + bi[i];
2324078e994SBarry Smith     nz = bi[i+1] - bi[i];
2337fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2344078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2357fc0212eSBarry Smith     /* check pivot entry for current row */
236a73cf429SBarry Smith     if (pv[diag] == 0.0) {
23729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2387fc0212eSBarry Smith     }
2397fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2407fc0212eSBarry Smith   }
2417fc0212eSBarry Smith 
242606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2437fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2447fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2457fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2467fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
247b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2483a40ed3dSBarry Smith   PetscFunctionReturn(0);
2497fc0212eSBarry Smith }
2507fc0212eSBarry Smith 
251273d9f13SBarry Smith 
2525d517e7eSBarry Smith /* ----------------------------------------------------------- */
2534a2ae208SSatish Balay #undef __FUNCT__
2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
2565d517e7eSBarry Smith {
257dfbe8321SBarry Smith   PetscErrorCode ierr;
2585d517e7eSBarry Smith   Mat            C;
2595d517e7eSBarry Smith 
2603a40ed3dSBarry Smith   PetscFunctionBegin;
261b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
2627fc0212eSBarry Smith   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
263273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
2665d517e7eSBarry Smith }
2674cd38560SBarry Smith 
268c05c3958SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h"
269c05c3958SHong Zhang #undef __FUNCT__
270c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N"
271c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B)
272c05c3958SHong Zhang {
273*78910aadSHong Zhang   Mat            C = *B;
274*78910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
275*78910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
276*78910aadSHong Zhang   IS             ip=b->row;
277*78910aadSHong Zhang   PetscErrorCode ierr;
278*78910aadSHong Zhang   PetscInt       *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol;
279*78910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j;
280*78910aadSHong Zhang   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
281*78910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
282*78910aadSHong Zhang   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
283*78910aadSHong Zhang   PetscTruth     damp,chshift;
284*78910aadSHong Zhang   PetscInt       nshift=0,ndamp=0;
285*78910aadSHong Zhang 
286c05c3958SHong Zhang   PetscFunctionBegin;
287*78910aadSHong Zhang   if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet");
288*78910aadSHong Zhang   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
289*78910aadSHong Zhang 
290*78910aadSHong Zhang   /* initialization */
291*78910aadSHong Zhang   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
292*78910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
293*78910aadSHong Zhang   jl   = il + mbs;
294*78910aadSHong Zhang   rtmp = (MatScalar*)(jl + mbs);
295*78910aadSHong Zhang 
296*78910aadSHong Zhang   shift_amount = 0;
297*78910aadSHong Zhang   do {
298*78910aadSHong Zhang     damp = PETSC_FALSE;
299*78910aadSHong Zhang     chshift = PETSC_FALSE;
300*78910aadSHong Zhang     for (i=0; i<mbs; i++) {
301*78910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
302*78910aadSHong Zhang     }
303*78910aadSHong Zhang 
304*78910aadSHong Zhang     for (k = 0; k<mbs; k++){
305*78910aadSHong Zhang       bval = ba + bi[k];
306*78910aadSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
307*78910aadSHong Zhang       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
308*78910aadSHong Zhang       for (j = jmin; j < jmax; j++){
309*78910aadSHong Zhang         col = rip[aj[j]];
310*78910aadSHong Zhang         if (col >= k){ /* only take upper triangular entry */
311*78910aadSHong Zhang           rtmp[col] = aa[j];
312*78910aadSHong Zhang           *bval++  = 0.0; /* for in-place factorization */
313*78910aadSHong Zhang         }
314*78910aadSHong Zhang       }
315*78910aadSHong Zhang       /* damp the diagonal of the matrix */
316*78910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
317*78910aadSHong Zhang 
318*78910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
319*78910aadSHong Zhang       dk = rtmp[k];
320*78910aadSHong Zhang       i = jl[k]; /* first row to be added to k_th row  */
321*78910aadSHong Zhang 
322*78910aadSHong Zhang       while (i < k){
323*78910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
324*78910aadSHong Zhang 
325*78910aadSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
326*78910aadSHong Zhang         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
327*78910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
328*78910aadSHong Zhang         dk += uikdi*ba[ili];
329*78910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
330*78910aadSHong Zhang 
331*78910aadSHong Zhang         /* add multiple of row i to k-th row */
332*78910aadSHong Zhang         jmin = ili + 1; jmax = bi[i+1];
333*78910aadSHong Zhang         if (jmin < jmax){
334*78910aadSHong Zhang           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
335*78910aadSHong Zhang           /* update il and jl for row i */
336*78910aadSHong Zhang           il[i] = jmin;
337*78910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
338*78910aadSHong Zhang         }
339*78910aadSHong Zhang         i = nexti;
340*78910aadSHong Zhang       }
341*78910aadSHong Zhang 
342*78910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
343*78910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
344*78910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
345*78910aadSHong Zhang 	jmin      = bi[k]+1;
346*78910aadSHong Zhang 	nz        = bi[k+1] - jmin;
347*78910aadSHong Zhang 	if (nz){
348*78910aadSHong Zhang 	  bcol = bj + jmin;
349*78910aadSHong Zhang 	  bval = ba + jmin;
350*78910aadSHong Zhang 	  while (nz--){
351*78910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
352*78910aadSHong Zhang 	  }
353*78910aadSHong Zhang 	}
354*78910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
355*78910aadSHong Zhang 	   one by a bit */
356*78910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
357*78910aadSHong Zhang 	chshift  = PETSC_TRUE;
358*78910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
359*78910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
360*78910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
361*78910aadSHong Zhang 	   than the ILU algorithm. */
362*78910aadSHong Zhang 	nshift++;
363*78910aadSHong Zhang 	break;
364*78910aadSHong Zhang       }
365*78910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
366*78910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
367*78910aadSHong Zhang         if (damping > 0.0) {
368*78910aadSHong Zhang           if (ndamp) damping *= 2.0;
369*78910aadSHong Zhang           damp = PETSC_TRUE;
370*78910aadSHong Zhang           ndamp++;
371*78910aadSHong Zhang           break;
372*78910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
373*78910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
374*78910aadSHong Zhang         } else {
375*78910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
376*78910aadSHong Zhang         }
377*78910aadSHong Zhang       }
378*78910aadSHong Zhang 
379*78910aadSHong Zhang       /* copy data into U(k,:) */
380*78910aadSHong Zhang       ba[bi[k]] = 1.0/dk; /* U(k,k) */
381*78910aadSHong Zhang       jmin = bi[k]+1; jmax = bi[k+1];
382*78910aadSHong Zhang       if (jmin < jmax) {
383*78910aadSHong Zhang         for (j=jmin; j<jmax; j++){
384*78910aadSHong Zhang           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
385*78910aadSHong Zhang         }
386*78910aadSHong Zhang         /* add the k-th row into il and jl */
387*78910aadSHong Zhang         il[k] = jmin;
388*78910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
389*78910aadSHong Zhang       }
390*78910aadSHong Zhang     }
391*78910aadSHong Zhang   } while (damp||chshift);
392*78910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
393*78910aadSHong Zhang 
394*78910aadSHong Zhang   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
395*78910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
396*78910aadSHong Zhang   C->assembled    = PETSC_TRUE;
397*78910aadSHong Zhang   C->preallocated = PETSC_TRUE;
398*78910aadSHong Zhang   PetscLogFlops(C->m);
399*78910aadSHong Zhang   if (ndamp) {
400*78910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
401*78910aadSHong Zhang   }
402*78910aadSHong Zhang   if (nshift) {
403*78910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift);
404*78910aadSHong Zhang   }
405c05c3958SHong Zhang   PetscFunctionReturn(0);
406c05c3958SHong Zhang }
4074cd38560SBarry Smith 
408c05c3958SHong Zhang #undef __FUNCT__
409c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering"
410c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact)
411c05c3958SHong Zhang {
412*78910aadSHong Zhang   Mat            C = *fact;
413*78910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
414*78910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
415*78910aadSHong Zhang   PetscErrorCode ierr;
416*78910aadSHong Zhang   PetscInt       i,j,am=a->mbs,bs=A->bs;
417*78910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
418*78910aadSHong Zhang   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
419*78910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
420*78910aadSHong Zhang   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
421*78910aadSHong Zhang   PetscTruth     damp,chshift;
422*78910aadSHong Zhang   PetscInt       nshift=0;
423*78910aadSHong Zhang 
424c05c3958SHong Zhang   PetscFunctionBegin;
425*78910aadSHong Zhang   if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet");
426*78910aadSHong Zhang   /* initialization */
427*78910aadSHong Zhang   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
428*78910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
429*78910aadSHong Zhang   jl   = il + am;
430*78910aadSHong Zhang   rtmp = (MatScalar*)(jl + am);
431*78910aadSHong Zhang 
432*78910aadSHong Zhang   shift_amount = 0;
433*78910aadSHong Zhang   do {
434*78910aadSHong Zhang     damp = PETSC_FALSE;
435*78910aadSHong Zhang     chshift = PETSC_FALSE;
436*78910aadSHong Zhang     for (i=0; i<am; i++) {
437*78910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
438*78910aadSHong Zhang     }
439*78910aadSHong Zhang 
440*78910aadSHong Zhang     for (k = 0; k<am; k++){
441*78910aadSHong Zhang     /* initialize k-th row with elements nonzero in row perm(k) of A */
442*78910aadSHong Zhang       nz   = ai[k+1] - ai[k];
443*78910aadSHong Zhang       acol = aj + ai[k];
444*78910aadSHong Zhang       aval = aa + ai[k];
445*78910aadSHong Zhang       bval = ba + bi[k];
446*78910aadSHong Zhang       while (nz -- ){
447*78910aadSHong Zhang         if (*acol < k) { /* skip lower triangular entries */
448*78910aadSHong Zhang           acol++; aval++;
449*78910aadSHong Zhang         } else {
450*78910aadSHong Zhang           rtmp[*acol++] = *aval++;
451*78910aadSHong Zhang           *bval++       = 0.0; /* for in-place factorization */
452*78910aadSHong Zhang         }
453*78910aadSHong Zhang       }
454*78910aadSHong Zhang       /* damp the diagonal of the matrix */
455*78910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
456*78910aadSHong Zhang 
457*78910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
458*78910aadSHong Zhang       dk = rtmp[k];
459*78910aadSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
460*78910aadSHong Zhang 
461*78910aadSHong Zhang       while (i < k){
462*78910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
463*78910aadSHong Zhang         /* compute multiplier, update D(k) and U(i,k) */
464*78910aadSHong Zhang         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
465*78910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];
466*78910aadSHong Zhang         dk   += uikdi*ba[ili];
467*78910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
468*78910aadSHong Zhang 
469*78910aadSHong Zhang         /* add multiple of row i to k-th row ... */
470*78910aadSHong Zhang         jmin = ili + 1;
471*78910aadSHong Zhang         nz   = bi[i+1] - jmin;
472*78910aadSHong Zhang         if (nz > 0){
473*78910aadSHong Zhang           bcol = bj + jmin;
474*78910aadSHong Zhang           bval = ba + jmin;
475*78910aadSHong Zhang           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
476*78910aadSHong Zhang           /* update il and jl for i-th row */
477*78910aadSHong Zhang           il[i] = jmin;
478*78910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
479*78910aadSHong Zhang         }
480*78910aadSHong Zhang         i = nexti;
481*78910aadSHong Zhang       }
482*78910aadSHong Zhang 
483*78910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
484*78910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
485*78910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
486*78910aadSHong Zhang 	jmin      = bi[k]+1;
487*78910aadSHong Zhang 	nz        = bi[k+1] - jmin;
488*78910aadSHong Zhang 	if (nz){
489*78910aadSHong Zhang 	  bcol = bj + jmin;
490*78910aadSHong Zhang 	  bval = ba + jmin;
491*78910aadSHong Zhang 	  while (nz--){
492*78910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
493*78910aadSHong Zhang 	  }
494*78910aadSHong Zhang 	}
495*78910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
496*78910aadSHong Zhang 	   one by a bit */
497*78910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
498*78910aadSHong Zhang 	chshift  = PETSC_TRUE;
499*78910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
500*78910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
501*78910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
502*78910aadSHong Zhang 	   than the ILU algorithm. */
503*78910aadSHong Zhang 	nshift++;
504*78910aadSHong Zhang 	break;
505*78910aadSHong Zhang       }
506*78910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
507*78910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
508*78910aadSHong Zhang         if (damping > 0.0) {
509*78910aadSHong Zhang           if (ndamp) damping *= 2.0;
510*78910aadSHong Zhang           damp = PETSC_TRUE;
511*78910aadSHong Zhang           ndamp++;
512*78910aadSHong Zhang           break;
513*78910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
514*78910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
515*78910aadSHong Zhang         } else {
516*78910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
517*78910aadSHong Zhang         }
518*78910aadSHong Zhang       }
519*78910aadSHong Zhang 
520*78910aadSHong Zhang       /* copy data into U(k,:) */
521*78910aadSHong Zhang       ba[bi[k]] = 1.0/dk;
522*78910aadSHong Zhang       jmin      = bi[k]+1;
523*78910aadSHong Zhang       nz        = bi[k+1] - jmin;
524*78910aadSHong Zhang       if (nz){
525*78910aadSHong Zhang         bcol = bj + jmin;
526*78910aadSHong Zhang         bval = ba + jmin;
527*78910aadSHong Zhang         while (nz--){
528*78910aadSHong Zhang           *bval++       = rtmp[*bcol];
529*78910aadSHong Zhang           rtmp[*bcol++] = 0.0;
530*78910aadSHong Zhang         }
531*78910aadSHong Zhang         /* add k-th row into il and jl */
532*78910aadSHong Zhang         il[k] = jmin;
533*78910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
534*78910aadSHong Zhang       }
535*78910aadSHong Zhang     }
536*78910aadSHong Zhang   } while (damp||chshift);
537*78910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
538*78910aadSHong Zhang 
539*78910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
540*78910aadSHong Zhang   C->assembled    = PETSC_TRUE;
541*78910aadSHong Zhang   C->preallocated = PETSC_TRUE;
542*78910aadSHong Zhang   PetscLogFlops(C->m);
543*78910aadSHong Zhang   if (ndamp) {
544*78910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
545*78910aadSHong Zhang   }
546*78910aadSHong Zhang   if (nshift) {
547*78910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
548*78910aadSHong Zhang   }
549c05c3958SHong Zhang   PetscFunctionReturn(0);
550c05c3958SHong Zhang }
551c05c3958SHong Zhang 
552c05c3958SHong Zhang #include "petscbt.h"
553c05c3958SHong Zhang #include "src/mat/utils/freespace.h"
554c05c3958SHong Zhang #undef __FUNCT__
555c05c3958SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ"
556c05c3958SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
557c05c3958SHong Zhang {
558*78910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
559*78910aadSHong Zhang   Mat_SeqSBAIJ   *b;
560*78910aadSHong Zhang   Mat            B;
561*78910aadSHong Zhang   PetscErrorCode ierr;
562*78910aadSHong Zhang   PetscTruth     perm_identity;
563*78910aadSHong Zhang   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui;
564*78910aadSHong Zhang   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
565*78910aadSHong Zhang   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
566*78910aadSHong Zhang   PetscReal      fill=info->fill,levels=info->levels;
567*78910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
568*78910aadSHong Zhang   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
569*78910aadSHong Zhang   PetscBT        lnkbt;
570*78910aadSHong Zhang 
571c05c3958SHong Zhang   PetscFunctionBegin;
572*78910aadSHong Zhang   if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet");
573*78910aadSHong Zhang     ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
574*78910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
575*78910aadSHong Zhang 
576*78910aadSHong Zhang   /* special case that simply copies fill pattern */
577*78910aadSHong Zhang   if (!levels && perm_identity) {
578*78910aadSHong Zhang     ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
579*78910aadSHong Zhang     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
580*78910aadSHong Zhang     for (i=0; i<am; i++) {
581*78910aadSHong Zhang       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
582*78910aadSHong Zhang     }
583*78910aadSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
584*78910aadSHong Zhang     B = *fact;
585*78910aadSHong Zhang     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
586*78910aadSHong Zhang     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
587*78910aadSHong Zhang 
588*78910aadSHong Zhang     b  = (Mat_SeqSBAIJ*)B->data;
589*78910aadSHong Zhang     uj = b->j;
590*78910aadSHong Zhang     for (i=0; i<am; i++) {
591*78910aadSHong Zhang       aj = a->j + a->diag[i];
592*78910aadSHong Zhang       for (j=0; j<ui[i]; j++){
593*78910aadSHong Zhang         *uj++ = *aj++;
594*78910aadSHong Zhang       }
595*78910aadSHong Zhang       b->ilen[i] = ui[i];
596*78910aadSHong Zhang     }
597*78910aadSHong Zhang     ierr = PetscFree(ui);CHKERRQ(ierr);
598*78910aadSHong Zhang     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
599*78910aadSHong Zhang     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
600*78910aadSHong Zhang 
601*78910aadSHong Zhang     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
602*78910aadSHong Zhang     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
603*78910aadSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
604*78910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
605*78910aadSHong Zhang     PetscFunctionReturn(0);
606*78910aadSHong Zhang   }
607*78910aadSHong Zhang 
608*78910aadSHong Zhang   /* initialization */
609*78910aadSHong Zhang   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
610*78910aadSHong Zhang   ui[0] = 0;
611*78910aadSHong Zhang   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
612*78910aadSHong Zhang 
613*78910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
614*78910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
615*78910aadSHong Zhang   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
616*78910aadSHong Zhang   il         = jl + am;
617*78910aadSHong Zhang   uj_ptr     = (PetscInt**)(il + am);
618*78910aadSHong Zhang   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
619*78910aadSHong Zhang   for (i=0; i<am; i++){
620*78910aadSHong Zhang     jl[i] = am; il[i] = 0;
621*78910aadSHong Zhang   }
622*78910aadSHong Zhang 
623*78910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
624*78910aadSHong Zhang   nlnk = am + 1;
625*78910aadSHong Zhang   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
626*78910aadSHong Zhang 
627*78910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
628*78910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
629*78910aadSHong Zhang   current_space = free_space;
630*78910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
631*78910aadSHong Zhang   current_space_lvl = free_space_lvl;
632*78910aadSHong Zhang 
633*78910aadSHong Zhang   for (k=0; k<am; k++){  /* for each active row k */
634*78910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
635*78910aadSHong Zhang     nzk   = 0;
636*78910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
637*78910aadSHong Zhang     ncols_upper = 0;
638*78910aadSHong Zhang     cols        = cols_lvl + am;
639*78910aadSHong Zhang     for (j=0; j<ncols; j++){
640*78910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
641*78910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
642*78910aadSHong Zhang         cols[ncols_upper] = i;
643*78910aadSHong Zhang         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
644*78910aadSHong Zhang         ncols_upper++;
645*78910aadSHong Zhang       }
646*78910aadSHong Zhang     }
647*78910aadSHong Zhang     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
648*78910aadSHong Zhang     nzk += nlnk;
649*78910aadSHong Zhang 
650*78910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
651*78910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
652*78910aadSHong Zhang 
653*78910aadSHong Zhang     while (prow < k){
654*78910aadSHong Zhang       nextprow = jl[prow];
655*78910aadSHong Zhang 
656*78910aadSHong Zhang       /* merge prow into k-th row */
657*78910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
658*78910aadSHong Zhang       jmax = ui[prow+1];
659*78910aadSHong Zhang       ncols = jmax-jmin;
660*78910aadSHong Zhang       i     = jmin - ui[prow];
661*78910aadSHong Zhang       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
662*78910aadSHong Zhang       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
663*78910aadSHong Zhang       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
664*78910aadSHong Zhang       nzk += nlnk;
665*78910aadSHong Zhang 
666*78910aadSHong Zhang       /* update il and jl for prow */
667*78910aadSHong Zhang       if (jmin < jmax){
668*78910aadSHong Zhang         il[prow] = jmin;
669*78910aadSHong Zhang         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
670*78910aadSHong Zhang       }
671*78910aadSHong Zhang       prow = nextprow;
672*78910aadSHong Zhang     }
673*78910aadSHong Zhang 
674*78910aadSHong Zhang     /* if free space is not available, make more free space */
675*78910aadSHong Zhang     if (current_space->local_remaining<nzk) {
676*78910aadSHong Zhang       i = am - k + 1; /* num of unfactored rows */
677*78910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
678*78910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
679*78910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
680*78910aadSHong Zhang       reallocs++;
681*78910aadSHong Zhang     }
682*78910aadSHong Zhang 
683*78910aadSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
684*78910aadSHong Zhang     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
685*78910aadSHong Zhang 
686*78910aadSHong Zhang     /* add the k-th row into il and jl */
687*78910aadSHong Zhang     if (nzk-1 > 0){
688*78910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
689*78910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
690*78910aadSHong Zhang       il[k] = ui[k] + 1;
691*78910aadSHong Zhang     }
692*78910aadSHong Zhang     uj_ptr[k]     = current_space->array;
693*78910aadSHong Zhang     uj_lvl_ptr[k] = current_space_lvl->array;
694*78910aadSHong Zhang 
695*78910aadSHong Zhang     current_space->array           += nzk;
696*78910aadSHong Zhang     current_space->local_used      += nzk;
697*78910aadSHong Zhang     current_space->local_remaining -= nzk;
698*78910aadSHong Zhang 
699*78910aadSHong Zhang     current_space_lvl->array           += nzk;
700*78910aadSHong Zhang     current_space_lvl->local_used      += nzk;
701*78910aadSHong Zhang     current_space_lvl->local_remaining -= nzk;
702*78910aadSHong Zhang 
703*78910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
704*78910aadSHong Zhang   }
705*78910aadSHong Zhang 
706*78910aadSHong Zhang   if (ai[am] != 0) {
707*78910aadSHong Zhang     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
708*78910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
709*78910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
710*78910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
711*78910aadSHong Zhang   } else {
712*78910aadSHong Zhang      PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n");
713*78910aadSHong Zhang   }
714*78910aadSHong Zhang 
715*78910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
716*78910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
717*78910aadSHong Zhang   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
718*78910aadSHong Zhang 
719*78910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
720*78910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
721*78910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
722*78910aadSHong Zhang   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
723*78910aadSHong Zhang   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
724*78910aadSHong Zhang 
725*78910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
726*78910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
727*78910aadSHong Zhang   B = *fact;
728*78910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
729*78910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
730*78910aadSHong Zhang 
731*78910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
732*78910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
733*78910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
734*78910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
735*78910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
736*78910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
737*78910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
738*78910aadSHong Zhang   b->j    = uj;
739*78910aadSHong Zhang   b->i    = ui;
740*78910aadSHong Zhang   b->diag = 0;
741*78910aadSHong Zhang   b->ilen = 0;
742*78910aadSHong Zhang   b->imax = 0;
743*78910aadSHong Zhang   b->row  = perm;
744*78910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
745*78910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
746*78910aadSHong Zhang   b->icol = perm;
747*78910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
748*78910aadSHong Zhang   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
749*78910aadSHong Zhang   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
750*78910aadSHong Zhang   b->maxnz = b->nz = ui[am];
751*78910aadSHong Zhang 
752*78910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
753*78910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
754*78910aadSHong Zhang   B->info.fill_ratio_given  = fill;
755*78910aadSHong Zhang   if (ai[am] != 0) {
756*78910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
757*78910aadSHong Zhang   } else {
758*78910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
759*78910aadSHong Zhang   }
760*78910aadSHong Zhang   if (perm_identity){
761*78910aadSHong Zhang     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
762*78910aadSHong Zhang     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
763*78910aadSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
764*78910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
765*78910aadSHong Zhang   } else {
766*78910aadSHong Zhang     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
767*78910aadSHong Zhang   }
768c05c3958SHong Zhang   PetscFunctionReturn(0);
769c05c3958SHong Zhang }
770c05c3958SHong Zhang 
771c05c3958SHong Zhang #undef __FUNCT__
772c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ"
773c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
774c05c3958SHong Zhang {
775*78910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
776*78910aadSHong Zhang   Mat_SeqSBAIJ   *b;
777*78910aadSHong Zhang   Mat            B;
778*78910aadSHong Zhang   PetscErrorCode ierr;
779*78910aadSHong Zhang   PetscTruth     perm_identity;
780*78910aadSHong Zhang   PetscReal      fill = info->fill;
781*78910aadSHong Zhang   PetscInt       *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
782*78910aadSHong Zhang   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
783*78910aadSHong Zhang   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
784*78910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
785*78910aadSHong Zhang   PetscBT        lnkbt;
786*78910aadSHong Zhang   IS             iperm;
787*78910aadSHong Zhang 
788c05c3958SHong Zhang   PetscFunctionBegin;
789*78910aadSHong Zhang   /* check whether perm is the identity mapping */
790*78910aadSHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
791*78910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
792*78910aadSHong Zhang 
793*78910aadSHong Zhang   if (!perm_identity){
794*78910aadSHong Zhang     /* check if perm is symmetric! */
795*78910aadSHong Zhang     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
796*78910aadSHong Zhang     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
797*78910aadSHong Zhang     for (i=0; i<mbs; i++) {
798*78910aadSHong Zhang       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
799*78910aadSHong Zhang     }
800*78910aadSHong Zhang     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
801*78910aadSHong Zhang     ierr = ISDestroy(iperm);CHKERRQ(ierr);
802*78910aadSHong Zhang   }
803*78910aadSHong Zhang 
804*78910aadSHong Zhang   /* initialization */
805*78910aadSHong Zhang   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
806*78910aadSHong Zhang   ui[0] = 0;
807*78910aadSHong Zhang 
808*78910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
809*78910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
810*78910aadSHong Zhang   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
811*78910aadSHong Zhang   il     = jl + mbs;
812*78910aadSHong Zhang   cols   = il + mbs;
813*78910aadSHong Zhang   ui_ptr = (PetscInt**)(cols + mbs);
814*78910aadSHong Zhang   for (i=0; i<mbs; i++){
815*78910aadSHong Zhang     jl[i] = mbs; il[i] = 0;
816*78910aadSHong Zhang   }
817*78910aadSHong Zhang 
818*78910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
819*78910aadSHong Zhang   nlnk = mbs + 1;
820*78910aadSHong Zhang   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
821*78910aadSHong Zhang 
822*78910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[mbs]+1) */
823*78910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
824*78910aadSHong Zhang   current_space = free_space;
825*78910aadSHong Zhang 
826*78910aadSHong Zhang   for (k=0; k<mbs; k++){  /* for each active row k */
827*78910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
828*78910aadSHong Zhang     nzk   = 0;
829*78910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
830*78910aadSHong Zhang     ncols_upper = 0;
831*78910aadSHong Zhang     for (j=0; j<ncols; j++){
832*78910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
833*78910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
834*78910aadSHong Zhang         cols[ncols_upper] = i;
835*78910aadSHong Zhang         ncols_upper++;
836*78910aadSHong Zhang       }
837*78910aadSHong Zhang     }
838*78910aadSHong Zhang     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
839*78910aadSHong Zhang     nzk += nlnk;
840*78910aadSHong Zhang 
841*78910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
842*78910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
843*78910aadSHong Zhang 
844*78910aadSHong Zhang     while (prow < k){
845*78910aadSHong Zhang       nextprow = jl[prow];
846*78910aadSHong Zhang       /* merge prow into k-th row */
847*78910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
848*78910aadSHong Zhang       jmax = ui[prow+1];
849*78910aadSHong Zhang       ncols = jmax-jmin;
850*78910aadSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
851*78910aadSHong Zhang       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
852*78910aadSHong Zhang       nzk += nlnk;
853*78910aadSHong Zhang 
854*78910aadSHong Zhang       /* update il and jl for prow */
855*78910aadSHong Zhang       if (jmin < jmax){
856*78910aadSHong Zhang         il[prow] = jmin;
857*78910aadSHong Zhang         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
858*78910aadSHong Zhang       }
859*78910aadSHong Zhang       prow = nextprow;
860*78910aadSHong Zhang     }
861*78910aadSHong Zhang 
862*78910aadSHong Zhang     /* if free space is not available, make more free space */
863*78910aadSHong Zhang     if (current_space->local_remaining<nzk) {
864*78910aadSHong Zhang       i = mbs - k + 1; /* num of unfactored rows */
865*78910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
866*78910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
867*78910aadSHong Zhang       reallocs++;
868*78910aadSHong Zhang     }
869*78910aadSHong Zhang 
870*78910aadSHong Zhang     /* copy data into free space, then initialize lnk */
871*78910aadSHong Zhang     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
872*78910aadSHong Zhang 
873*78910aadSHong Zhang     /* add the k-th row into il and jl */
874*78910aadSHong Zhang     if (nzk-1 > 0){
875*78910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
876*78910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
877*78910aadSHong Zhang       il[k] = ui[k] + 1;
878*78910aadSHong Zhang     }
879*78910aadSHong Zhang     ui_ptr[k] = current_space->array;
880*78910aadSHong Zhang     current_space->array           += nzk;
881*78910aadSHong Zhang     current_space->local_used      += nzk;
882*78910aadSHong Zhang     current_space->local_remaining -= nzk;
883*78910aadSHong Zhang 
884*78910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
885*78910aadSHong Zhang   }
886*78910aadSHong Zhang 
887*78910aadSHong Zhang   if (ai[mbs] != 0) {
888*78910aadSHong Zhang     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
889*78910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
890*78910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
891*78910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
892*78910aadSHong Zhang   } else {
893*78910aadSHong Zhang      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
894*78910aadSHong Zhang   }
895*78910aadSHong Zhang 
896*78910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
897*78910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
898*78910aadSHong Zhang 
899*78910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
900*78910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
901*78910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
902*78910aadSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
903*78910aadSHong Zhang 
904*78910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
905*78910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
906*78910aadSHong Zhang   B    = *fact;
907*78910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
908*78910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
909*78910aadSHong Zhang 
910*78910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
911*78910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
912*78910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
913*78910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
914*78910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
915*78910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
916*78910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
917*78910aadSHong Zhang   b->j    = uj;
918*78910aadSHong Zhang   b->i    = ui;
919*78910aadSHong Zhang   b->diag = 0;
920*78910aadSHong Zhang   b->ilen = 0;
921*78910aadSHong Zhang   b->imax = 0;
922*78910aadSHong Zhang   b->row  = perm;
923*78910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
924*78910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
925*78910aadSHong Zhang   b->icol = perm;
926*78910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
927*78910aadSHong Zhang   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
928*78910aadSHong Zhang   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
929*78910aadSHong Zhang   b->maxnz = b->nz = ui[mbs];
930*78910aadSHong Zhang 
931*78910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
932*78910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
933*78910aadSHong Zhang   B->info.fill_ratio_given  = fill;
934*78910aadSHong Zhang   if (ai[mbs] != 0) {
935*78910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
936*78910aadSHong Zhang   } else {
937*78910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
938*78910aadSHong Zhang   }
939*78910aadSHong Zhang   if (perm_identity){
940*78910aadSHong Zhang     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
941*78910aadSHong Zhang     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
942*78910aadSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr);
943*78910aadSHong Zhang     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
944*78910aadSHong Zhang   } else {
945*78910aadSHong Zhang     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
946*78910aadSHong Zhang   }
947c05c3958SHong Zhang   PetscFunctionReturn(0);
948c05c3958SHong Zhang }
949