xref: /petsc/src/mat/impls/baij/seq/baijfact.c (revision 6ad2eadd5cb0bc4b28d6657ac7d402cd16a3e3c5)
15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith     Factorization code for BAIJ format.
35d517e7eSBarry Smith */
470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
54078e994SBarry Smith #include "src/inline/ilu.h"
6ec3a8b7bSBarry Smith 
76d84be18SBarry Smith /* ------------------------------------------------------------*/
86d84be18SBarry Smith /*
94eeb42bcSBarry Smith       Version for when blocks are 2 by 2
104eeb42bcSBarry Smith */
114a2ae208SSatish Balay #undef __FUNCT__
124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2"
13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
144eeb42bcSBarry Smith {
154eeb42bcSBarry Smith   Mat            C = *B;
164eeb42bcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
177cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
186849ba73SBarry Smith   PetscErrorCode ierr;
19690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
20690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
21690b6cddSBarry Smith   PetscInt       *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
22329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
232515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4;
243f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
254eeb42bcSBarry Smith 
263a40ed3dSBarry Smith   PetscFunctionBegin;
274eeb42bcSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
284eeb42bcSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
29b0a32e0cSBarry Smith   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
304eeb42bcSBarry Smith 
314eeb42bcSBarry Smith   for (i=0; i<n; i++) {
324078e994SBarry Smith     nz    = bi[i+1] - bi[i];
334078e994SBarry Smith     ajtmp = bj + bi[i];
344eeb42bcSBarry Smith     for  (j=0; j<nz; j++) {
354eeb42bcSBarry Smith       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
364eeb42bcSBarry Smith     }
374eeb42bcSBarry Smith     /* load in initial (unfactored row) */
384eeb42bcSBarry Smith     idx      = r[i];
394078e994SBarry Smith     nz       = ai[idx+1] - ai[idx];
404078e994SBarry Smith     ajtmpold = aj + ai[idx];
414078e994SBarry Smith     v        = aa + 4*ai[idx];
424eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
434eeb42bcSBarry Smith       x    = rtmp+4*ic[ajtmpold[j]];
444eeb42bcSBarry Smith       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
454eeb42bcSBarry Smith       v    += 4;
464eeb42bcSBarry Smith     }
474eeb42bcSBarry Smith     row = *ajtmp++;
484eeb42bcSBarry Smith     while (row < i) {
494eeb42bcSBarry Smith       pc = rtmp + 4*row;
504eeb42bcSBarry Smith       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
5188685aaeSLois Curfman McInnes       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
524078e994SBarry Smith         pv = ba + 4*diag_offset[row];
534078e994SBarry Smith         pj = bj + diag_offset[row] + 1;
544eeb42bcSBarry Smith         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
554eeb42bcSBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
564eeb42bcSBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
574eeb42bcSBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
584eeb42bcSBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
594078e994SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
604eeb42bcSBarry Smith         pv += 4;
614eeb42bcSBarry Smith         for (j=0; j<nz; j++) {
624eeb42bcSBarry Smith           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
634eeb42bcSBarry Smith           x    = rtmp + 4*pj[j];
644eeb42bcSBarry Smith           x[0] -= m1*x1 + m3*x2;
654eeb42bcSBarry Smith           x[1] -= m2*x1 + m4*x2;
664eeb42bcSBarry Smith           x[2] -= m1*x3 + m3*x4;
674eeb42bcSBarry Smith           x[3] -= m2*x3 + m4*x4;
684eeb42bcSBarry Smith           pv   += 4;
694eeb42bcSBarry Smith         }
70b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
714eeb42bcSBarry Smith       }
724eeb42bcSBarry Smith       row = *ajtmp++;
734eeb42bcSBarry Smith     }
744eeb42bcSBarry Smith     /* finished row so stick it into b->a */
754078e994SBarry Smith     pv = ba + 4*bi[i];
764078e994SBarry Smith     pj = bj + bi[i];
774078e994SBarry Smith     nz = bi[i+1] - bi[i];
784eeb42bcSBarry Smith     for (j=0; j<nz; j++) {
794eeb42bcSBarry Smith       x     = rtmp+4*pj[j];
804eeb42bcSBarry Smith       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
814eeb42bcSBarry Smith       pv   += 4;
824eeb42bcSBarry Smith     }
834eeb42bcSBarry Smith     /* invert diagonal block */
844078e994SBarry Smith     w = ba + 4*diag_offset[i];
854cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
864eeb42bcSBarry Smith   }
874eeb42bcSBarry Smith 
88606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
894eeb42bcSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
904eeb42bcSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
914eeb42bcSBarry Smith   C->factor = FACTOR_LU;
924eeb42bcSBarry Smith   C->assembled = PETSC_TRUE;
93b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
943a40ed3dSBarry Smith   PetscFunctionReturn(0);
954eeb42bcSBarry Smith }
964cd38560SBarry Smith /*
974cd38560SBarry Smith       Version for when blocks are 2 by 2 Using natural ordering
984cd38560SBarry Smith */
994a2ae208SSatish Balay #undef __FUNCT__
1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering"
101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
1024cd38560SBarry Smith {
1034cd38560SBarry Smith   Mat            C = *B;
1044cd38560SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
105dfbe8321SBarry Smith   PetscErrorCode ierr;
106690b6cddSBarry Smith   PetscInt       i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row;
108690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,*pc,*w,*x;
1102515f8d2SSatish Balay   MatScalar      p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
1114cd38560SBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1124cd38560SBarry Smith 
1134cd38560SBarry Smith   PetscFunctionBegin;
114b0a32e0cSBarry Smith   ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
1154cd38560SBarry Smith 
1164cd38560SBarry Smith   for (i=0; i<n; i++) {
1174cd38560SBarry Smith     nz    = bi[i+1] - bi[i];
1184cd38560SBarry Smith     ajtmp = bj + bi[i];
1194cd38560SBarry Smith     for  (j=0; j<nz; j++) {
1204cd38560SBarry Smith       x = rtmp+4*ajtmp[j];
1214cd38560SBarry Smith       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
1224cd38560SBarry Smith     }
1234cd38560SBarry Smith     /* load in initial (unfactored row) */
1244cd38560SBarry Smith     nz       = ai[i+1] - ai[i];
1254cd38560SBarry Smith     ajtmpold = aj + ai[i];
1264cd38560SBarry Smith     v        = aa + 4*ai[i];
1274cd38560SBarry Smith     for (j=0; j<nz; j++) {
1284cd38560SBarry Smith       x    = rtmp+4*ajtmpold[j];
1294cd38560SBarry Smith       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
1304cd38560SBarry Smith       v    += 4;
1314cd38560SBarry Smith     }
1324cd38560SBarry Smith     row = *ajtmp++;
1334cd38560SBarry Smith     while (row < i) {
1344cd38560SBarry Smith       pc  = rtmp + 4*row;
1354cd38560SBarry Smith       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
1364cd38560SBarry Smith       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
1374cd38560SBarry Smith         pv = ba + 4*diag_offset[row];
1384cd38560SBarry Smith         pj = bj + diag_offset[row] + 1;
1394cd38560SBarry Smith         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
1404cd38560SBarry Smith         pc[0] = m1 = p1*x1 + p3*x2;
1414cd38560SBarry Smith         pc[1] = m2 = p2*x1 + p4*x2;
1424cd38560SBarry Smith         pc[2] = m3 = p1*x3 + p3*x4;
1434cd38560SBarry Smith         pc[3] = m4 = p2*x3 + p4*x4;
1444cd38560SBarry Smith         nz = bi[row+1] - diag_offset[row] - 1;
1454cd38560SBarry Smith         pv += 4;
1464cd38560SBarry Smith         for (j=0; j<nz; j++) {
1474cd38560SBarry Smith           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
1484cd38560SBarry Smith           x    = rtmp + 4*pj[j];
1494cd38560SBarry Smith           x[0] -= m1*x1 + m3*x2;
1504cd38560SBarry Smith           x[1] -= m2*x1 + m4*x2;
1514cd38560SBarry Smith           x[2] -= m1*x3 + m3*x4;
1524cd38560SBarry Smith           x[3] -= m2*x3 + m4*x4;
1534cd38560SBarry Smith           pv   += 4;
1544cd38560SBarry Smith         }
155b0a32e0cSBarry Smith         PetscLogFlops(16*nz+12);
1564cd38560SBarry Smith       }
1574cd38560SBarry Smith       row = *ajtmp++;
1584cd38560SBarry Smith     }
1594cd38560SBarry Smith     /* finished row so stick it into b->a */
1604cd38560SBarry Smith     pv = ba + 4*bi[i];
1614cd38560SBarry Smith     pj = bj + bi[i];
1624cd38560SBarry Smith     nz = bi[i+1] - bi[i];
1634cd38560SBarry Smith     for (j=0; j<nz; j++) {
1644cd38560SBarry Smith       x      = rtmp+4*pj[j];
1654cd38560SBarry Smith       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
1664cd38560SBarry Smith       pv   += 4;
1674cd38560SBarry Smith     }
1684cd38560SBarry Smith     /* invert diagonal block */
1694cd38560SBarry Smith     w = ba + 4*diag_offset[i];
1704cd38560SBarry Smith     ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr);
1714cd38560SBarry Smith     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
1724cd38560SBarry Smith   }
1734cd38560SBarry Smith 
174606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
1754cd38560SBarry Smith   C->factor    = FACTOR_LU;
1764cd38560SBarry Smith   C->assembled = PETSC_TRUE;
177b0a32e0cSBarry Smith   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1784cd38560SBarry Smith   PetscFunctionReturn(0);
1794cd38560SBarry Smith }
1807fc0212eSBarry Smith 
1817fc0212eSBarry Smith /* ----------------------------------------------------------- */
1827fc0212eSBarry Smith /*
1837fc0212eSBarry Smith      Version for when blocks are 1 by 1.
1847fc0212eSBarry Smith */
1854a2ae208SSatish Balay #undef __FUNCT__
1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1"
187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
1887fc0212eSBarry Smith {
1897fc0212eSBarry Smith   Mat            C = *B;
1907fc0212eSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
1917cdf2dbbSSatish Balay   IS             isrow = b->row,isicol = b->icol;
1926849ba73SBarry Smith   PetscErrorCode ierr;
193690b6cddSBarry Smith   PetscInt       *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
194690b6cddSBarry Smith   PetscInt       *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
195690b6cddSBarry Smith   PetscInt       *diag_offset = b->diag,diag,*pj;
196329f5518SBarry Smith   MatScalar      *pv,*v,*rtmp,multiplier,*pc;
1973f1db9ecSBarry Smith   MatScalar      *ba = b->a,*aa = a->a;
1987fc0212eSBarry Smith 
1993a40ed3dSBarry Smith   PetscFunctionBegin;
2007fc0212eSBarry Smith   ierr  = ISGetIndices(isrow,&r);CHKERRQ(ierr);
2017fc0212eSBarry Smith   ierr  = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
202b0a32e0cSBarry Smith   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr);
2037fc0212eSBarry Smith 
2047fc0212eSBarry Smith   for (i=0; i<n; i++) {
2054078e994SBarry Smith     nz    = bi[i+1] - bi[i];
2064078e994SBarry Smith     ajtmp = bj + bi[i];
2077fc0212eSBarry Smith     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
2087fc0212eSBarry Smith 
2097fc0212eSBarry Smith     /* load in initial (unfactored row) */
2104078e994SBarry Smith     nz       = ai[r[i]+1] - ai[r[i]];
2114078e994SBarry Smith     ajtmpold = aj + ai[r[i]];
2124078e994SBarry Smith     v        = aa + ai[r[i]];
2137fc0212eSBarry Smith     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];
2147fc0212eSBarry Smith 
2157fc0212eSBarry Smith     row = *ajtmp++;
2167fc0212eSBarry Smith     while (row < i) {
2177fc0212eSBarry Smith       pc = rtmp + row;
2187fc0212eSBarry Smith       if (*pc != 0.0) {
2194078e994SBarry Smith         pv         = ba + diag_offset[row];
2204078e994SBarry Smith         pj         = bj + diag_offset[row] + 1;
2217fc0212eSBarry Smith         multiplier = *pc * *pv++;
2227fc0212eSBarry Smith         *pc        = multiplier;
2234078e994SBarry Smith         nz         = bi[row+1] - diag_offset[row] - 1;
2247fc0212eSBarry Smith         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
225b0a32e0cSBarry Smith         PetscLogFlops(1+2*nz);
2267fc0212eSBarry Smith       }
2277fc0212eSBarry Smith       row = *ajtmp++;
2287fc0212eSBarry Smith     }
2297fc0212eSBarry Smith     /* finished row so stick it into b->a */
2304078e994SBarry Smith     pv = ba + bi[i];
2314078e994SBarry Smith     pj = bj + bi[i];
2324078e994SBarry Smith     nz = bi[i+1] - bi[i];
2337fc0212eSBarry Smith     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
2344078e994SBarry Smith     diag = diag_offset[i] - bi[i];
2357fc0212eSBarry Smith     /* check pivot entry for current row */
236a73cf429SBarry Smith     if (pv[diag] == 0.0) {
23729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
2387fc0212eSBarry Smith     }
2397fc0212eSBarry Smith     pv[diag] = 1.0/pv[diag];
2407fc0212eSBarry Smith   }
2417fc0212eSBarry Smith 
242606d414cSSatish Balay   ierr = PetscFree(rtmp);CHKERRQ(ierr);
2437fc0212eSBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2447fc0212eSBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
2457fc0212eSBarry Smith   C->factor    = FACTOR_LU;
2467fc0212eSBarry Smith   C->assembled = PETSC_TRUE;
247b0a32e0cSBarry Smith   PetscLogFlops(C->n);
2483a40ed3dSBarry Smith   PetscFunctionReturn(0);
2497fc0212eSBarry Smith }
2507fc0212eSBarry Smith 
251273d9f13SBarry Smith 
2525d517e7eSBarry Smith /* ----------------------------------------------------------- */
2534a2ae208SSatish Balay #undef __FUNCT__
2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ"
255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
2565d517e7eSBarry Smith {
257dfbe8321SBarry Smith   PetscErrorCode ierr;
2585d517e7eSBarry Smith   Mat            C;
2595d517e7eSBarry Smith 
2603a40ed3dSBarry Smith   PetscFunctionBegin;
261b9b97703SBarry Smith   ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr);
2627fc0212eSBarry Smith   ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr);
263273d9f13SBarry Smith   ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
264b0a32e0cSBarry Smith   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
2653a40ed3dSBarry Smith   PetscFunctionReturn(0);
2665d517e7eSBarry Smith }
2674cd38560SBarry Smith 
268c05c3958SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h"
269c05c3958SHong Zhang #undef __FUNCT__
270c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N"
271c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B)
272c05c3958SHong Zhang {
27378910aadSHong Zhang   Mat            C = *B;
27478910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
27578910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
27678910aadSHong Zhang   IS             ip=b->row;
277*6ad2eaddSHong Zhang   PetscErrorCode ierr,(*f)(Mat,Mat*);
27878910aadSHong Zhang   PetscInt       *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol;
27978910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j;
28078910aadSHong Zhang   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
28178910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
28278910aadSHong Zhang   PetscReal      damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount;
28378910aadSHong Zhang   PetscTruth     damp,chshift;
28478910aadSHong Zhang   PetscInt       nshift=0,ndamp=0;
28578910aadSHong Zhang 
286c05c3958SHong Zhang   PetscFunctionBegin;
287*6ad2eaddSHong Zhang   if (bs > 1) {
288*6ad2eaddSHong Zhang     if (!a->sbaijMat){
289*6ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
290*6ad2eaddSHong Zhang     }
291*6ad2eaddSHong Zhang     ierr = PetscObjectQueryFunction((PetscObject)C,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr);
292*6ad2eaddSHong Zhang     ierr = (*f)(a->sbaijMat,B);CHKERRQ(ierr);
293*6ad2eaddSHong Zhang     ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);
294*6ad2eaddSHong Zhang     a->sbaijMat = PETSC_NULL;
295*6ad2eaddSHong Zhang     PetscFunctionReturn(0);
296*6ad2eaddSHong Zhang   }
29778910aadSHong Zhang 
29878910aadSHong Zhang   /* initialization */
299*6ad2eaddSHong Zhang   ierr  = ISGetIndices(ip,&rip);CHKERRQ(ierr);
30078910aadSHong Zhang   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
30178910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
30278910aadSHong Zhang   jl   = il + mbs;
30378910aadSHong Zhang   rtmp = (MatScalar*)(jl + mbs);
30478910aadSHong Zhang 
30578910aadSHong Zhang   shift_amount = 0;
30678910aadSHong Zhang   do {
30778910aadSHong Zhang     damp = PETSC_FALSE;
30878910aadSHong Zhang     chshift = PETSC_FALSE;
30978910aadSHong Zhang     for (i=0; i<mbs; i++) {
31078910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
31178910aadSHong Zhang     }
31278910aadSHong Zhang 
31378910aadSHong Zhang     for (k = 0; k<mbs; k++){
31478910aadSHong Zhang       bval = ba + bi[k];
31578910aadSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
31678910aadSHong Zhang       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
31778910aadSHong Zhang       for (j = jmin; j < jmax; j++){
31878910aadSHong Zhang         col = rip[aj[j]];
31978910aadSHong Zhang         if (col >= k){ /* only take upper triangular entry */
32078910aadSHong Zhang           rtmp[col] = aa[j];
32178910aadSHong Zhang           *bval++  = 0.0; /* for in-place factorization */
32278910aadSHong Zhang         }
32378910aadSHong Zhang       }
32478910aadSHong Zhang       /* damp the diagonal of the matrix */
32578910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
32678910aadSHong Zhang 
32778910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
32878910aadSHong Zhang       dk = rtmp[k];
32978910aadSHong Zhang       i = jl[k]; /* first row to be added to k_th row  */
33078910aadSHong Zhang 
33178910aadSHong Zhang       while (i < k){
33278910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
33378910aadSHong Zhang 
33478910aadSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
33578910aadSHong Zhang         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
33678910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
33778910aadSHong Zhang         dk += uikdi*ba[ili];
33878910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
33978910aadSHong Zhang 
34078910aadSHong Zhang         /* add multiple of row i to k-th row */
34178910aadSHong Zhang         jmin = ili + 1; jmax = bi[i+1];
34278910aadSHong Zhang         if (jmin < jmax){
34378910aadSHong Zhang           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
34478910aadSHong Zhang           /* update il and jl for row i */
34578910aadSHong Zhang           il[i] = jmin;
34678910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
34778910aadSHong Zhang         }
34878910aadSHong Zhang         i = nexti;
34978910aadSHong Zhang       }
35078910aadSHong Zhang 
35178910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
35278910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
35378910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
35478910aadSHong Zhang 	jmin      = bi[k]+1;
35578910aadSHong Zhang 	nz        = bi[k+1] - jmin;
35678910aadSHong Zhang 	if (nz){
35778910aadSHong Zhang 	  bcol = bj + jmin;
35878910aadSHong Zhang 	  bval = ba + jmin;
35978910aadSHong Zhang 	  while (nz--){
36078910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
36178910aadSHong Zhang 	  }
36278910aadSHong Zhang 	}
36378910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
36478910aadSHong Zhang 	   one by a bit */
36578910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
36678910aadSHong Zhang 	chshift  = PETSC_TRUE;
36778910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
36878910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
36978910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
37078910aadSHong Zhang 	   than the ILU algorithm. */
37178910aadSHong Zhang 	nshift++;
37278910aadSHong Zhang 	break;
37378910aadSHong Zhang       }
37478910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
37578910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
37678910aadSHong Zhang         if (damping > 0.0) {
37778910aadSHong Zhang           if (ndamp) damping *= 2.0;
37878910aadSHong Zhang           damp = PETSC_TRUE;
37978910aadSHong Zhang           ndamp++;
38078910aadSHong Zhang           break;
38178910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
38278910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
38378910aadSHong Zhang         } else {
38478910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
38578910aadSHong Zhang         }
38678910aadSHong Zhang       }
38778910aadSHong Zhang 
38878910aadSHong Zhang       /* copy data into U(k,:) */
38978910aadSHong Zhang       ba[bi[k]] = 1.0/dk; /* U(k,k) */
39078910aadSHong Zhang       jmin = bi[k]+1; jmax = bi[k+1];
39178910aadSHong Zhang       if (jmin < jmax) {
39278910aadSHong Zhang         for (j=jmin; j<jmax; j++){
39378910aadSHong Zhang           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
39478910aadSHong Zhang         }
39578910aadSHong Zhang         /* add the k-th row into il and jl */
39678910aadSHong Zhang         il[k] = jmin;
39778910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
39878910aadSHong Zhang       }
39978910aadSHong Zhang     }
40078910aadSHong Zhang   } while (damp||chshift);
40178910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
40278910aadSHong Zhang 
40378910aadSHong Zhang   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
40478910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
40578910aadSHong Zhang   C->assembled    = PETSC_TRUE;
40678910aadSHong Zhang   C->preallocated = PETSC_TRUE;
40778910aadSHong Zhang   PetscLogFlops(C->m);
40878910aadSHong Zhang   if (ndamp) {
40978910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
41078910aadSHong Zhang   }
41178910aadSHong Zhang   if (nshift) {
41278910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift);
41378910aadSHong Zhang   }
414c05c3958SHong Zhang   PetscFunctionReturn(0);
415c05c3958SHong Zhang }
4164cd38560SBarry Smith 
417c05c3958SHong Zhang #undef __FUNCT__
418c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering"
419c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact)
420c05c3958SHong Zhang {
42178910aadSHong Zhang   Mat            C = *fact;
42278910aadSHong Zhang   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
42378910aadSHong Zhang   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
42478910aadSHong Zhang   PetscErrorCode ierr;
42578910aadSHong Zhang   PetscInt       i,j,am=a->mbs,bs=A->bs;
42678910aadSHong Zhang   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
42778910aadSHong Zhang   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
42878910aadSHong Zhang   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
42978910aadSHong Zhang   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
43078910aadSHong Zhang   PetscTruth     damp,chshift;
43178910aadSHong Zhang   PetscInt       nshift=0;
43278910aadSHong Zhang 
433c05c3958SHong Zhang   PetscFunctionBegin;
434*6ad2eaddSHong Zhang   if (bs > 1) {
435*6ad2eaddSHong Zhang     SETERRQ(PETSC_ERR_USER,"not done yet");
436*6ad2eaddSHong Zhang   }
437*6ad2eaddSHong Zhang 
43878910aadSHong Zhang   /* initialization */
43978910aadSHong Zhang   nz   = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar);
44078910aadSHong Zhang   ierr = PetscMalloc(nz,&il);CHKERRQ(ierr);
44178910aadSHong Zhang   jl   = il + am;
44278910aadSHong Zhang   rtmp = (MatScalar*)(jl + am);
44378910aadSHong Zhang 
44478910aadSHong Zhang   shift_amount = 0;
44578910aadSHong Zhang   do {
44678910aadSHong Zhang     damp = PETSC_FALSE;
44778910aadSHong Zhang     chshift = PETSC_FALSE;
44878910aadSHong Zhang     for (i=0; i<am; i++) {
44978910aadSHong Zhang       rtmp[i] = 0.0; jl[i] = am; il[0] = 0;
45078910aadSHong Zhang     }
45178910aadSHong Zhang 
45278910aadSHong Zhang     for (k = 0; k<am; k++){
45378910aadSHong Zhang     /* initialize k-th row with elements nonzero in row perm(k) of A */
45478910aadSHong Zhang       nz   = ai[k+1] - ai[k];
45578910aadSHong Zhang       acol = aj + ai[k];
45678910aadSHong Zhang       aval = aa + ai[k];
45778910aadSHong Zhang       bval = ba + bi[k];
45878910aadSHong Zhang       while (nz -- ){
45978910aadSHong Zhang         if (*acol < k) { /* skip lower triangular entries */
46078910aadSHong Zhang           acol++; aval++;
46178910aadSHong Zhang         } else {
46278910aadSHong Zhang           rtmp[*acol++] = *aval++;
46378910aadSHong Zhang           *bval++       = 0.0; /* for in-place factorization */
46478910aadSHong Zhang         }
46578910aadSHong Zhang       }
46678910aadSHong Zhang       /* damp the diagonal of the matrix */
46778910aadSHong Zhang       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
46878910aadSHong Zhang 
46978910aadSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
47078910aadSHong Zhang       dk = rtmp[k];
47178910aadSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
47278910aadSHong Zhang 
47378910aadSHong Zhang       while (i < k){
47478910aadSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
47578910aadSHong Zhang         /* compute multiplier, update D(k) and U(i,k) */
47678910aadSHong Zhang         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
47778910aadSHong Zhang         uikdi = - ba[ili]*ba[bi[i]];
47878910aadSHong Zhang         dk   += uikdi*ba[ili];
47978910aadSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
48078910aadSHong Zhang 
48178910aadSHong Zhang         /* add multiple of row i to k-th row ... */
48278910aadSHong Zhang         jmin = ili + 1;
48378910aadSHong Zhang         nz   = bi[i+1] - jmin;
48478910aadSHong Zhang         if (nz > 0){
48578910aadSHong Zhang           bcol = bj + jmin;
48678910aadSHong Zhang           bval = ba + jmin;
48778910aadSHong Zhang           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
48878910aadSHong Zhang           /* update il and jl for i-th row */
48978910aadSHong Zhang           il[i] = jmin;
49078910aadSHong Zhang           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
49178910aadSHong Zhang         }
49278910aadSHong Zhang         i = nexti;
49378910aadSHong Zhang       }
49478910aadSHong Zhang 
49578910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
49678910aadSHong Zhang 	/* calculate a shift that would make this row diagonally dominant */
49778910aadSHong Zhang 	PetscReal rs = PetscAbs(PetscRealPart(dk));
49878910aadSHong Zhang 	jmin      = bi[k]+1;
49978910aadSHong Zhang 	nz        = bi[k+1] - jmin;
50078910aadSHong Zhang 	if (nz){
50178910aadSHong Zhang 	  bcol = bj + jmin;
50278910aadSHong Zhang 	  bval = ba + jmin;
50378910aadSHong Zhang 	  while (nz--){
50478910aadSHong Zhang 	    rs += PetscAbsScalar(rtmp[*bcol++]);
50578910aadSHong Zhang 	  }
50678910aadSHong Zhang 	}
50778910aadSHong Zhang 	/* if this shift is less than the previous, just up the previous
50878910aadSHong Zhang 	   one by a bit */
50978910aadSHong Zhang 	shift_amount = PetscMax(rs,1.1*shift_amount);
51078910aadSHong Zhang 	chshift  = PETSC_TRUE;
51178910aadSHong Zhang 	/* Unlike in the ILU case there is no exit condition on nshift:
51278910aadSHong Zhang 	   we increase the shift until it converges. There is no guarantee that
51378910aadSHong Zhang 	   this algorithm converges faster or slower, or is better or worse
51478910aadSHong Zhang 	   than the ILU algorithm. */
51578910aadSHong Zhang 	nshift++;
51678910aadSHong Zhang 	break;
51778910aadSHong Zhang       }
51878910aadSHong Zhang       if (PetscRealPart(dk) < zeropivot){
51978910aadSHong Zhang         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
52078910aadSHong Zhang         if (damping > 0.0) {
52178910aadSHong Zhang           if (ndamp) damping *= 2.0;
52278910aadSHong Zhang           damp = PETSC_TRUE;
52378910aadSHong Zhang           ndamp++;
52478910aadSHong Zhang           break;
52578910aadSHong Zhang         } else if (PetscAbsScalar(dk) < zeropivot){
52678910aadSHong Zhang           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
52778910aadSHong Zhang         } else {
52878910aadSHong Zhang           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
52978910aadSHong Zhang         }
53078910aadSHong Zhang       }
53178910aadSHong Zhang 
53278910aadSHong Zhang       /* copy data into U(k,:) */
53378910aadSHong Zhang       ba[bi[k]] = 1.0/dk;
53478910aadSHong Zhang       jmin      = bi[k]+1;
53578910aadSHong Zhang       nz        = bi[k+1] - jmin;
53678910aadSHong Zhang       if (nz){
53778910aadSHong Zhang         bcol = bj + jmin;
53878910aadSHong Zhang         bval = ba + jmin;
53978910aadSHong Zhang         while (nz--){
54078910aadSHong Zhang           *bval++       = rtmp[*bcol];
54178910aadSHong Zhang           rtmp[*bcol++] = 0.0;
54278910aadSHong Zhang         }
54378910aadSHong Zhang         /* add k-th row into il and jl */
54478910aadSHong Zhang         il[k] = jmin;
54578910aadSHong Zhang         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
54678910aadSHong Zhang       }
54778910aadSHong Zhang     }
54878910aadSHong Zhang   } while (damp||chshift);
54978910aadSHong Zhang   ierr = PetscFree(il);CHKERRQ(ierr);
55078910aadSHong Zhang 
55178910aadSHong Zhang   C->factor       = FACTOR_CHOLESKY;
55278910aadSHong Zhang   C->assembled    = PETSC_TRUE;
55378910aadSHong Zhang   C->preallocated = PETSC_TRUE;
55478910aadSHong Zhang   PetscLogFlops(C->m);
55578910aadSHong Zhang   if (ndamp) {
55678910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
55778910aadSHong Zhang   }
55878910aadSHong Zhang   if (nshift) {
55978910aadSHong Zhang     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
56078910aadSHong Zhang   }
561c05c3958SHong Zhang   PetscFunctionReturn(0);
562c05c3958SHong Zhang }
563c05c3958SHong Zhang 
564c05c3958SHong Zhang #include "petscbt.h"
565c05c3958SHong Zhang #include "src/mat/utils/freespace.h"
566c05c3958SHong Zhang #undef __FUNCT__
567c05c3958SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ"
568c05c3958SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
569c05c3958SHong Zhang {
57078910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
57178910aadSHong Zhang   Mat_SeqSBAIJ   *b;
57278910aadSHong Zhang   Mat            B;
57378910aadSHong Zhang   PetscErrorCode ierr;
57478910aadSHong Zhang   PetscTruth     perm_identity;
57578910aadSHong Zhang   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui;
57678910aadSHong Zhang   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
57778910aadSHong Zhang   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
57878910aadSHong Zhang   PetscReal      fill=info->fill,levels=info->levels;
57978910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
58078910aadSHong Zhang   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
58178910aadSHong Zhang   PetscBT        lnkbt;
58278910aadSHong Zhang 
583c05c3958SHong Zhang   PetscFunctionBegin;
584*6ad2eaddSHong Zhang   if (bs > 1){
585*6ad2eaddSHong Zhang     if (!a->sbaijMat){
586*6ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
587*6ad2eaddSHong Zhang     }
588*6ad2eaddSHong Zhang     ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
589*6ad2eaddSHong Zhang     B = *fact;
590*6ad2eaddSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
591*6ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
592*6ad2eaddSHong Zhang     PetscFunctionReturn(0);
593*6ad2eaddSHong Zhang   }
594*6ad2eaddSHong Zhang 
59578910aadSHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
59678910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
59778910aadSHong Zhang 
59878910aadSHong Zhang   /* special case that simply copies fill pattern */
59978910aadSHong Zhang   if (!levels && perm_identity) {
60078910aadSHong Zhang     ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
60178910aadSHong Zhang     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
60278910aadSHong Zhang     for (i=0; i<am; i++) {
60378910aadSHong Zhang       ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */
60478910aadSHong Zhang     }
60578910aadSHong Zhang     ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
60678910aadSHong Zhang     B = *fact;
60778910aadSHong Zhang     ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
60878910aadSHong Zhang     ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr);
60978910aadSHong Zhang 
61078910aadSHong Zhang     b  = (Mat_SeqSBAIJ*)B->data;
61178910aadSHong Zhang     uj = b->j;
61278910aadSHong Zhang     for (i=0; i<am; i++) {
61378910aadSHong Zhang       aj = a->j + a->diag[i];
61478910aadSHong Zhang       for (j=0; j<ui[i]; j++){
61578910aadSHong Zhang         *uj++ = *aj++;
61678910aadSHong Zhang       }
61778910aadSHong Zhang       b->ilen[i] = ui[i];
61878910aadSHong Zhang     }
61978910aadSHong Zhang     ierr = PetscFree(ui);CHKERRQ(ierr);
62078910aadSHong Zhang     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
62178910aadSHong Zhang     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
62278910aadSHong Zhang 
62378910aadSHong Zhang     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
62478910aadSHong Zhang     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
62578910aadSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
62678910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
62778910aadSHong Zhang     PetscFunctionReturn(0);
62878910aadSHong Zhang   }
62978910aadSHong Zhang 
63078910aadSHong Zhang   /* initialization */
63178910aadSHong Zhang   ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
63278910aadSHong Zhang   ui[0] = 0;
63378910aadSHong Zhang   ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
63478910aadSHong Zhang 
63578910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
63678910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
63778910aadSHong Zhang   ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
63878910aadSHong Zhang   il         = jl + am;
63978910aadSHong Zhang   uj_ptr     = (PetscInt**)(il + am);
64078910aadSHong Zhang   uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
64178910aadSHong Zhang   for (i=0; i<am; i++){
64278910aadSHong Zhang     jl[i] = am; il[i] = 0;
64378910aadSHong Zhang   }
64478910aadSHong Zhang 
64578910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
64678910aadSHong Zhang   nlnk = am + 1;
64778910aadSHong Zhang   ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
64878910aadSHong Zhang 
64978910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[am]+1) */
65078910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
65178910aadSHong Zhang   current_space = free_space;
65278910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
65378910aadSHong Zhang   current_space_lvl = free_space_lvl;
65478910aadSHong Zhang 
65578910aadSHong Zhang   for (k=0; k<am; k++){  /* for each active row k */
65678910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
65778910aadSHong Zhang     nzk   = 0;
65878910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
65978910aadSHong Zhang     ncols_upper = 0;
66078910aadSHong Zhang     cols        = cols_lvl + am;
66178910aadSHong Zhang     for (j=0; j<ncols; j++){
66278910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
66378910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
66478910aadSHong Zhang         cols[ncols_upper] = i;
66578910aadSHong Zhang         cols_lvl[ncols_upper] = -1;  /* initialize level for nonzero entries */
66678910aadSHong Zhang         ncols_upper++;
66778910aadSHong Zhang       }
66878910aadSHong Zhang     }
66978910aadSHong Zhang     ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
67078910aadSHong Zhang     nzk += nlnk;
67178910aadSHong Zhang 
67278910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
67378910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
67478910aadSHong Zhang 
67578910aadSHong Zhang     while (prow < k){
67678910aadSHong Zhang       nextprow = jl[prow];
67778910aadSHong Zhang 
67878910aadSHong Zhang       /* merge prow into k-th row */
67978910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
68078910aadSHong Zhang       jmax = ui[prow+1];
68178910aadSHong Zhang       ncols = jmax-jmin;
68278910aadSHong Zhang       i     = jmin - ui[prow];
68378910aadSHong Zhang       cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
68478910aadSHong Zhang       for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
68578910aadSHong Zhang       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
68678910aadSHong Zhang       nzk += nlnk;
68778910aadSHong Zhang 
68878910aadSHong Zhang       /* update il and jl for prow */
68978910aadSHong Zhang       if (jmin < jmax){
69078910aadSHong Zhang         il[prow] = jmin;
69178910aadSHong Zhang         j = *cols; jl[prow] = jl[j]; jl[j] = prow;
69278910aadSHong Zhang       }
69378910aadSHong Zhang       prow = nextprow;
69478910aadSHong Zhang     }
69578910aadSHong Zhang 
69678910aadSHong Zhang     /* if free space is not available, make more free space */
69778910aadSHong Zhang     if (current_space->local_remaining<nzk) {
69878910aadSHong Zhang       i = am - k + 1; /* num of unfactored rows */
69978910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
70078910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
70178910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
70278910aadSHong Zhang       reallocs++;
70378910aadSHong Zhang     }
70478910aadSHong Zhang 
70578910aadSHong Zhang     /* copy data into free_space and free_space_lvl, then initialize lnk */
70678910aadSHong Zhang     ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
70778910aadSHong Zhang 
70878910aadSHong Zhang     /* add the k-th row into il and jl */
70978910aadSHong Zhang     if (nzk-1 > 0){
71078910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
71178910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
71278910aadSHong Zhang       il[k] = ui[k] + 1;
71378910aadSHong Zhang     }
71478910aadSHong Zhang     uj_ptr[k]     = current_space->array;
71578910aadSHong Zhang     uj_lvl_ptr[k] = current_space_lvl->array;
71678910aadSHong Zhang 
71778910aadSHong Zhang     current_space->array           += nzk;
71878910aadSHong Zhang     current_space->local_used      += nzk;
71978910aadSHong Zhang     current_space->local_remaining -= nzk;
72078910aadSHong Zhang 
72178910aadSHong Zhang     current_space_lvl->array           += nzk;
72278910aadSHong Zhang     current_space_lvl->local_used      += nzk;
72378910aadSHong Zhang     current_space_lvl->local_remaining -= nzk;
72478910aadSHong Zhang 
72578910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
72678910aadSHong Zhang   }
72778910aadSHong Zhang 
72878910aadSHong Zhang   if (ai[am] != 0) {
72978910aadSHong Zhang     PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]);
73078910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
73178910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
73278910aadSHong Zhang     PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
73378910aadSHong Zhang   } else {
73478910aadSHong Zhang      PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n");
73578910aadSHong Zhang   }
73678910aadSHong Zhang 
73778910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
73878910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
73978910aadSHong Zhang   ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
74078910aadSHong Zhang 
74178910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
74278910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
74378910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
74478910aadSHong Zhang   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
74578910aadSHong Zhang   ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
74678910aadSHong Zhang 
74778910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
74878910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
74978910aadSHong Zhang   B = *fact;
75078910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
75178910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
75278910aadSHong Zhang 
75378910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
75478910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
75578910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
75678910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
75778910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
75878910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
75978910aadSHong Zhang   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
76078910aadSHong Zhang   b->j    = uj;
76178910aadSHong Zhang   b->i    = ui;
76278910aadSHong Zhang   b->diag = 0;
76378910aadSHong Zhang   b->ilen = 0;
76478910aadSHong Zhang   b->imax = 0;
76578910aadSHong Zhang   b->row  = perm;
76678910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
76778910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
76878910aadSHong Zhang   b->icol = perm;
76978910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
77078910aadSHong Zhang   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
77178910aadSHong Zhang   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
77278910aadSHong Zhang   b->maxnz = b->nz = ui[am];
77378910aadSHong Zhang 
77478910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
77578910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
77678910aadSHong Zhang   B->info.fill_ratio_given  = fill;
77778910aadSHong Zhang   if (ai[am] != 0) {
77878910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
77978910aadSHong Zhang   } else {
78078910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
78178910aadSHong Zhang   }
78278910aadSHong Zhang   if (perm_identity){
78378910aadSHong Zhang     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
78478910aadSHong Zhang     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
78578910aadSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
78678910aadSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
78778910aadSHong Zhang   } else {
78878910aadSHong Zhang     (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
78978910aadSHong Zhang   }
790c05c3958SHong Zhang   PetscFunctionReturn(0);
791c05c3958SHong Zhang }
792c05c3958SHong Zhang 
793c05c3958SHong Zhang #undef __FUNCT__
794c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ"
795c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
796c05c3958SHong Zhang {
79778910aadSHong Zhang   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
79878910aadSHong Zhang   Mat_SeqSBAIJ   *b;
79978910aadSHong Zhang   Mat            B;
80078910aadSHong Zhang   PetscErrorCode ierr;
80178910aadSHong Zhang   PetscTruth     perm_identity;
80278910aadSHong Zhang   PetscReal      fill = info->fill;
80378910aadSHong Zhang   PetscInt       *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow;
80478910aadSHong Zhang   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
80578910aadSHong Zhang   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
80678910aadSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
80778910aadSHong Zhang   PetscBT        lnkbt;
80878910aadSHong Zhang   IS             iperm;
80978910aadSHong Zhang 
810c05c3958SHong Zhang   PetscFunctionBegin;
811*6ad2eaddSHong Zhang   if (bs > 1) { /* convert to seqsbaij */
812*6ad2eaddSHong Zhang     if (!a->sbaijMat){
813*6ad2eaddSHong Zhang       ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr);
814*6ad2eaddSHong Zhang     }
815*6ad2eaddSHong Zhang     ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr);
816*6ad2eaddSHong Zhang     B = *fact;
817*6ad2eaddSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
818*6ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
819*6ad2eaddSHong Zhang     PetscFunctionReturn(0);
820*6ad2eaddSHong Zhang   }
821*6ad2eaddSHong Zhang 
82278910aadSHong Zhang   /* check whether perm is the identity mapping */
82378910aadSHong Zhang   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
82478910aadSHong Zhang   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
82578910aadSHong Zhang 
82678910aadSHong Zhang   if (!perm_identity){
82778910aadSHong Zhang     /* check if perm is symmetric! */
82878910aadSHong Zhang     ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
82978910aadSHong Zhang     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
83078910aadSHong Zhang     for (i=0; i<mbs; i++) {
83178910aadSHong Zhang       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
83278910aadSHong Zhang     }
83378910aadSHong Zhang     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
83478910aadSHong Zhang     ierr = ISDestroy(iperm);CHKERRQ(ierr);
83578910aadSHong Zhang   }
83678910aadSHong Zhang 
83778910aadSHong Zhang   /* initialization */
83878910aadSHong Zhang   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
83978910aadSHong Zhang   ui[0] = 0;
84078910aadSHong Zhang 
84178910aadSHong Zhang   /* jl: linked list for storing indices of the pivot rows
84278910aadSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
84378910aadSHong Zhang   ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
84478910aadSHong Zhang   il     = jl + mbs;
84578910aadSHong Zhang   cols   = il + mbs;
84678910aadSHong Zhang   ui_ptr = (PetscInt**)(cols + mbs);
84778910aadSHong Zhang   for (i=0; i<mbs; i++){
84878910aadSHong Zhang     jl[i] = mbs; il[i] = 0;
84978910aadSHong Zhang   }
85078910aadSHong Zhang 
85178910aadSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
85278910aadSHong Zhang   nlnk = mbs + 1;
85378910aadSHong Zhang   ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
85478910aadSHong Zhang 
85578910aadSHong Zhang   /* initial FreeSpace size is fill*(ai[mbs]+1) */
85678910aadSHong Zhang   ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr);
85778910aadSHong Zhang   current_space = free_space;
85878910aadSHong Zhang 
85978910aadSHong Zhang   for (k=0; k<mbs; k++){  /* for each active row k */
86078910aadSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
86178910aadSHong Zhang     nzk   = 0;
86278910aadSHong Zhang     ncols = ai[rip[k]+1] - ai[rip[k]];
86378910aadSHong Zhang     ncols_upper = 0;
86478910aadSHong Zhang     for (j=0; j<ncols; j++){
86578910aadSHong Zhang       i = rip[*(aj + ai[rip[k]] + j)];
86678910aadSHong Zhang       if (i >= k){ /* only take upper triangular entry */
86778910aadSHong Zhang         cols[ncols_upper] = i;
86878910aadSHong Zhang         ncols_upper++;
86978910aadSHong Zhang       }
87078910aadSHong Zhang     }
87178910aadSHong Zhang     ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
87278910aadSHong Zhang     nzk += nlnk;
87378910aadSHong Zhang 
87478910aadSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
87578910aadSHong Zhang     prow = jl[k]; /* 1st pivot row */
87678910aadSHong Zhang 
87778910aadSHong Zhang     while (prow < k){
87878910aadSHong Zhang       nextprow = jl[prow];
87978910aadSHong Zhang       /* merge prow into k-th row */
88078910aadSHong Zhang       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
88178910aadSHong Zhang       jmax = ui[prow+1];
88278910aadSHong Zhang       ncols = jmax-jmin;
88378910aadSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
88478910aadSHong Zhang       ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr);
88578910aadSHong Zhang       nzk += nlnk;
88678910aadSHong Zhang 
88778910aadSHong Zhang       /* update il and jl for prow */
88878910aadSHong Zhang       if (jmin < jmax){
88978910aadSHong Zhang         il[prow] = jmin;
89078910aadSHong Zhang         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
89178910aadSHong Zhang       }
89278910aadSHong Zhang       prow = nextprow;
89378910aadSHong Zhang     }
89478910aadSHong Zhang 
89578910aadSHong Zhang     /* if free space is not available, make more free space */
89678910aadSHong Zhang     if (current_space->local_remaining<nzk) {
89778910aadSHong Zhang       i = mbs - k + 1; /* num of unfactored rows */
89878910aadSHong Zhang       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
89978910aadSHong Zhang       ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
90078910aadSHong Zhang       reallocs++;
90178910aadSHong Zhang     }
90278910aadSHong Zhang 
90378910aadSHong Zhang     /* copy data into free space, then initialize lnk */
90478910aadSHong Zhang     ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
90578910aadSHong Zhang 
90678910aadSHong Zhang     /* add the k-th row into il and jl */
90778910aadSHong Zhang     if (nzk-1 > 0){
90878910aadSHong Zhang       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
90978910aadSHong Zhang       jl[k] = jl[i]; jl[i] = k;
91078910aadSHong Zhang       il[k] = ui[k] + 1;
91178910aadSHong Zhang     }
91278910aadSHong Zhang     ui_ptr[k] = current_space->array;
91378910aadSHong Zhang     current_space->array           += nzk;
91478910aadSHong Zhang     current_space->local_used      += nzk;
91578910aadSHong Zhang     current_space->local_remaining -= nzk;
91678910aadSHong Zhang 
91778910aadSHong Zhang     ui[k+1] = ui[k] + nzk;
91878910aadSHong Zhang   }
91978910aadSHong Zhang 
92078910aadSHong Zhang   if (ai[mbs] != 0) {
92178910aadSHong Zhang     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
92278910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
92378910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
92478910aadSHong Zhang     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
92578910aadSHong Zhang   } else {
92678910aadSHong Zhang      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
92778910aadSHong Zhang   }
92878910aadSHong Zhang 
92978910aadSHong Zhang   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
93078910aadSHong Zhang   ierr = PetscFree(jl);CHKERRQ(ierr);
93178910aadSHong Zhang 
93278910aadSHong Zhang   /* destroy list of free space and other temporary array(s) */
93378910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
93478910aadSHong Zhang   ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
93578910aadSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
93678910aadSHong Zhang 
93778910aadSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
93878910aadSHong Zhang   ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr);
93978910aadSHong Zhang   B    = *fact;
94078910aadSHong Zhang   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
94178910aadSHong Zhang   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
94278910aadSHong Zhang 
94378910aadSHong Zhang   b = (Mat_SeqSBAIJ*)B->data;
94478910aadSHong Zhang   ierr = PetscFree(b->imax);CHKERRQ(ierr);
94578910aadSHong Zhang   b->singlemalloc = PETSC_FALSE;
94678910aadSHong Zhang   /* the next line frees the default space generated by the Create() */
94778910aadSHong Zhang   ierr = PetscFree(b->a);CHKERRQ(ierr);
94878910aadSHong Zhang   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
94978910aadSHong Zhang   ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
95078910aadSHong Zhang   b->j    = uj;
95178910aadSHong Zhang   b->i    = ui;
95278910aadSHong Zhang   b->diag = 0;
95378910aadSHong Zhang   b->ilen = 0;
95478910aadSHong Zhang   b->imax = 0;
95578910aadSHong Zhang   b->row  = perm;
95678910aadSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
95778910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
95878910aadSHong Zhang   b->icol = perm;
95978910aadSHong Zhang   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
96078910aadSHong Zhang   ierr    = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
96178910aadSHong Zhang   PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
96278910aadSHong Zhang   b->maxnz = b->nz = ui[mbs];
96378910aadSHong Zhang 
96478910aadSHong Zhang   B->factor                 = FACTOR_CHOLESKY;
96578910aadSHong Zhang   B->info.factor_mallocs    = reallocs;
96678910aadSHong Zhang   B->info.fill_ratio_given  = fill;
96778910aadSHong Zhang   if (ai[mbs] != 0) {
96878910aadSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
96978910aadSHong Zhang   } else {
97078910aadSHong Zhang     B->info.fill_ratio_needed = 0.0;
97178910aadSHong Zhang   }
97278910aadSHong Zhang   if (perm_identity){
973*6ad2eaddSHong Zhang     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
974*6ad2eaddSHong Zhang     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
975*6ad2eaddSHong Zhang     ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr);
976*6ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
97778910aadSHong Zhang   } else {
978*6ad2eaddSHong Zhang     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
97978910aadSHong Zhang   }
980c05c3958SHong Zhang   PetscFunctionReturn(0);
981c05c3958SHong Zhang }
982