15d517e7eSBarry Smith /* 2ec3a8b7bSBarry Smith Factorization code for BAIJ format. 35d517e7eSBarry Smith */ 470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 54078e994SBarry Smith #include "src/inline/ilu.h" 6ec3a8b7bSBarry Smith 76d84be18SBarry Smith /* ------------------------------------------------------------*/ 86d84be18SBarry Smith /* 94eeb42bcSBarry Smith Version for when blocks are 2 by 2 104eeb42bcSBarry Smith */ 114a2ae208SSatish Balay #undef __FUNCT__ 124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 144eeb42bcSBarry Smith { 154eeb42bcSBarry Smith Mat C = *B; 164eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 177cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 186849ba73SBarry Smith PetscErrorCode ierr; 19690b6cddSBarry Smith PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 20690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row; 21690b6cddSBarry Smith PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22329f5518SBarry Smith MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 232515f8d2SSatish Balay MatScalar p1,p2,p3,p4; 243f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 254eeb42bcSBarry Smith 263a40ed3dSBarry Smith PetscFunctionBegin; 274eeb42bcSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284eeb42bcSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 304eeb42bcSBarry Smith 314eeb42bcSBarry Smith for (i=0; i<n; i++) { 324078e994SBarry Smith nz = bi[i+1] - bi[i]; 334078e994SBarry Smith ajtmp = bj + bi[i]; 344eeb42bcSBarry Smith for (j=0; j<nz; j++) { 354eeb42bcSBarry Smith x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 364eeb42bcSBarry Smith } 374eeb42bcSBarry Smith /* load in initial (unfactored row) */ 384eeb42bcSBarry Smith idx = r[i]; 394078e994SBarry Smith nz = ai[idx+1] - ai[idx]; 404078e994SBarry Smith ajtmpold = aj + ai[idx]; 414078e994SBarry Smith v = aa + 4*ai[idx]; 424eeb42bcSBarry Smith for (j=0; j<nz; j++) { 434eeb42bcSBarry Smith x = rtmp+4*ic[ajtmpold[j]]; 444eeb42bcSBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 454eeb42bcSBarry Smith v += 4; 464eeb42bcSBarry Smith } 474eeb42bcSBarry Smith row = *ajtmp++; 484eeb42bcSBarry Smith while (row < i) { 494eeb42bcSBarry Smith pc = rtmp + 4*row; 504eeb42bcSBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 5188685aaeSLois Curfman McInnes if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 524078e994SBarry Smith pv = ba + 4*diag_offset[row]; 534078e994SBarry Smith pj = bj + diag_offset[row] + 1; 544eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 554eeb42bcSBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 564eeb42bcSBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 574eeb42bcSBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 584eeb42bcSBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 594078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 604eeb42bcSBarry Smith pv += 4; 614eeb42bcSBarry Smith for (j=0; j<nz; j++) { 624eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 634eeb42bcSBarry Smith x = rtmp + 4*pj[j]; 644eeb42bcSBarry Smith x[0] -= m1*x1 + m3*x2; 654eeb42bcSBarry Smith x[1] -= m2*x1 + m4*x2; 664eeb42bcSBarry Smith x[2] -= m1*x3 + m3*x4; 674eeb42bcSBarry Smith x[3] -= m2*x3 + m4*x4; 684eeb42bcSBarry Smith pv += 4; 694eeb42bcSBarry Smith } 70b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 714eeb42bcSBarry Smith } 724eeb42bcSBarry Smith row = *ajtmp++; 734eeb42bcSBarry Smith } 744eeb42bcSBarry Smith /* finished row so stick it into b->a */ 754078e994SBarry Smith pv = ba + 4*bi[i]; 764078e994SBarry Smith pj = bj + bi[i]; 774078e994SBarry Smith nz = bi[i+1] - bi[i]; 784eeb42bcSBarry Smith for (j=0; j<nz; j++) { 794eeb42bcSBarry Smith x = rtmp+4*pj[j]; 804eeb42bcSBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 814eeb42bcSBarry Smith pv += 4; 824eeb42bcSBarry Smith } 834eeb42bcSBarry Smith /* invert diagonal block */ 844078e994SBarry Smith w = ba + 4*diag_offset[i]; 854cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 864eeb42bcSBarry Smith } 874eeb42bcSBarry Smith 88606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 894eeb42bcSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 904eeb42bcSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 914eeb42bcSBarry Smith C->factor = FACTOR_LU; 924eeb42bcSBarry Smith C->assembled = PETSC_TRUE; 93b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 943a40ed3dSBarry Smith PetscFunctionReturn(0); 954eeb42bcSBarry Smith } 964cd38560SBarry Smith /* 974cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering 984cd38560SBarry Smith */ 994a2ae208SSatish Balay #undef __FUNCT__ 1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 1024cd38560SBarry Smith { 1034cd38560SBarry Smith Mat C = *B; 1044cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105dfbe8321SBarry Smith PetscErrorCode ierr; 106690b6cddSBarry Smith PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 107690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row; 108690b6cddSBarry Smith PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109329f5518SBarry Smith MatScalar *pv,*v,*rtmp,*pc,*w,*x; 1102515f8d2SSatish Balay MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 1114cd38560SBarry Smith MatScalar *ba = b->a,*aa = a->a; 1124cd38560SBarry Smith 1134cd38560SBarry Smith PetscFunctionBegin; 114b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1154cd38560SBarry Smith 1164cd38560SBarry Smith for (i=0; i<n; i++) { 1174cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1184cd38560SBarry Smith ajtmp = bj + bi[i]; 1194cd38560SBarry Smith for (j=0; j<nz; j++) { 1204cd38560SBarry Smith x = rtmp+4*ajtmp[j]; 1214cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0; 1224cd38560SBarry Smith } 1234cd38560SBarry Smith /* load in initial (unfactored row) */ 1244cd38560SBarry Smith nz = ai[i+1] - ai[i]; 1254cd38560SBarry Smith ajtmpold = aj + ai[i]; 1264cd38560SBarry Smith v = aa + 4*ai[i]; 1274cd38560SBarry Smith for (j=0; j<nz; j++) { 1284cd38560SBarry Smith x = rtmp+4*ajtmpold[j]; 1294cd38560SBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 1304cd38560SBarry Smith v += 4; 1314cd38560SBarry Smith } 1324cd38560SBarry Smith row = *ajtmp++; 1334cd38560SBarry Smith while (row < i) { 1344cd38560SBarry Smith pc = rtmp + 4*row; 1354cd38560SBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 1364cd38560SBarry Smith if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 1374cd38560SBarry Smith pv = ba + 4*diag_offset[row]; 1384cd38560SBarry Smith pj = bj + diag_offset[row] + 1; 1394cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1404cd38560SBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 1414cd38560SBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 1424cd38560SBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 1434cd38560SBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 1444cd38560SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 1454cd38560SBarry Smith pv += 4; 1464cd38560SBarry Smith for (j=0; j<nz; j++) { 1474cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1484cd38560SBarry Smith x = rtmp + 4*pj[j]; 1494cd38560SBarry Smith x[0] -= m1*x1 + m3*x2; 1504cd38560SBarry Smith x[1] -= m2*x1 + m4*x2; 1514cd38560SBarry Smith x[2] -= m1*x3 + m3*x4; 1524cd38560SBarry Smith x[3] -= m2*x3 + m4*x4; 1534cd38560SBarry Smith pv += 4; 1544cd38560SBarry Smith } 155b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 1564cd38560SBarry Smith } 1574cd38560SBarry Smith row = *ajtmp++; 1584cd38560SBarry Smith } 1594cd38560SBarry Smith /* finished row so stick it into b->a */ 1604cd38560SBarry Smith pv = ba + 4*bi[i]; 1614cd38560SBarry Smith pj = bj + bi[i]; 1624cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1634cd38560SBarry Smith for (j=0; j<nz; j++) { 1644cd38560SBarry Smith x = rtmp+4*pj[j]; 1654cd38560SBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 1664cd38560SBarry Smith pv += 4; 1674cd38560SBarry Smith } 1684cd38560SBarry Smith /* invert diagonal block */ 1694cd38560SBarry Smith w = ba + 4*diag_offset[i]; 1704cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 1714cd38560SBarry Smith /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 1724cd38560SBarry Smith } 1734cd38560SBarry Smith 174606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1754cd38560SBarry Smith C->factor = FACTOR_LU; 1764cd38560SBarry Smith C->assembled = PETSC_TRUE; 177b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1784cd38560SBarry Smith PetscFunctionReturn(0); 1794cd38560SBarry Smith } 1807fc0212eSBarry Smith 1817fc0212eSBarry Smith /* ----------------------------------------------------------- */ 1827fc0212eSBarry Smith /* 1837fc0212eSBarry Smith Version for when blocks are 1 by 1. 1847fc0212eSBarry Smith */ 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 1887fc0212eSBarry Smith { 1897fc0212eSBarry Smith Mat C = *B; 1907fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 1917cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 1926849ba73SBarry Smith PetscErrorCode ierr; 193690b6cddSBarry Smith PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 194690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 195690b6cddSBarry Smith PetscInt *diag_offset = b->diag,diag,*pj; 196329f5518SBarry Smith MatScalar *pv,*v,*rtmp,multiplier,*pc; 1973f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 1987fc0212eSBarry Smith 1993a40ed3dSBarry Smith PetscFunctionBegin; 2007fc0212eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2017fc0212eSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 202b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2037fc0212eSBarry Smith 2047fc0212eSBarry Smith for (i=0; i<n; i++) { 2054078e994SBarry Smith nz = bi[i+1] - bi[i]; 2064078e994SBarry Smith ajtmp = bj + bi[i]; 2077fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 2087fc0212eSBarry Smith 2097fc0212eSBarry Smith /* load in initial (unfactored row) */ 2104078e994SBarry Smith nz = ai[r[i]+1] - ai[r[i]]; 2114078e994SBarry Smith ajtmpold = aj + ai[r[i]]; 2124078e994SBarry Smith v = aa + ai[r[i]]; 2137fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 2147fc0212eSBarry Smith 2157fc0212eSBarry Smith row = *ajtmp++; 2167fc0212eSBarry Smith while (row < i) { 2177fc0212eSBarry Smith pc = rtmp + row; 2187fc0212eSBarry Smith if (*pc != 0.0) { 2194078e994SBarry Smith pv = ba + diag_offset[row]; 2204078e994SBarry Smith pj = bj + diag_offset[row] + 1; 2217fc0212eSBarry Smith multiplier = *pc * *pv++; 2227fc0212eSBarry Smith *pc = multiplier; 2234078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 2247fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 225b0a32e0cSBarry Smith PetscLogFlops(1+2*nz); 2267fc0212eSBarry Smith } 2277fc0212eSBarry Smith row = *ajtmp++; 2287fc0212eSBarry Smith } 2297fc0212eSBarry Smith /* finished row so stick it into b->a */ 2304078e994SBarry Smith pv = ba + bi[i]; 2314078e994SBarry Smith pj = bj + bi[i]; 2324078e994SBarry Smith nz = bi[i+1] - bi[i]; 2337fc0212eSBarry Smith for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 2344078e994SBarry Smith diag = diag_offset[i] - bi[i]; 2357fc0212eSBarry Smith /* check pivot entry for current row */ 236a73cf429SBarry Smith if (pv[diag] == 0.0) { 23729bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 2387fc0212eSBarry Smith } 2397fc0212eSBarry Smith pv[diag] = 1.0/pv[diag]; 2407fc0212eSBarry Smith } 2417fc0212eSBarry Smith 242606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 2437fc0212eSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2447fc0212eSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2457fc0212eSBarry Smith C->factor = FACTOR_LU; 2467fc0212eSBarry Smith C->assembled = PETSC_TRUE; 247b0a32e0cSBarry Smith PetscLogFlops(C->n); 2483a40ed3dSBarry Smith PetscFunctionReturn(0); 2497fc0212eSBarry Smith } 2507fc0212eSBarry Smith 251273d9f13SBarry Smith 2525d517e7eSBarry Smith /* ----------------------------------------------------------- */ 2534a2ae208SSatish Balay #undef __FUNCT__ 2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ" 255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 2565d517e7eSBarry Smith { 257dfbe8321SBarry Smith PetscErrorCode ierr; 2585d517e7eSBarry Smith Mat C; 2595d517e7eSBarry Smith 2603a40ed3dSBarry Smith PetscFunctionBegin; 261b9b97703SBarry Smith ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 2627fc0212eSBarry Smith ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 263273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 264b0a32e0cSBarry Smith PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 2653a40ed3dSBarry Smith PetscFunctionReturn(0); 2665d517e7eSBarry Smith } 2674cd38560SBarry Smith 268c05c3958SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h" 269c05c3958SHong Zhang #undef __FUNCT__ 270c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 271c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B) 272c05c3958SHong Zhang { 273*78910aadSHong Zhang Mat C = *B; 274*78910aadSHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 275*78910aadSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 276*78910aadSHong Zhang IS ip=b->row; 277*78910aadSHong Zhang PetscErrorCode ierr; 278*78910aadSHong Zhang PetscInt *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol; 279*78910aadSHong Zhang PetscInt *ai=a->i,*aj=a->j; 280*78910aadSHong Zhang PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 281*78910aadSHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 282*78910aadSHong Zhang PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 283*78910aadSHong Zhang PetscTruth damp,chshift; 284*78910aadSHong Zhang PetscInt nshift=0,ndamp=0; 285*78910aadSHong Zhang 286c05c3958SHong Zhang PetscFunctionBegin; 287*78910aadSHong Zhang if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet"); 288*78910aadSHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 289*78910aadSHong Zhang 290*78910aadSHong Zhang /* initialization */ 291*78910aadSHong Zhang nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 292*78910aadSHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 293*78910aadSHong Zhang jl = il + mbs; 294*78910aadSHong Zhang rtmp = (MatScalar*)(jl + mbs); 295*78910aadSHong Zhang 296*78910aadSHong Zhang shift_amount = 0; 297*78910aadSHong Zhang do { 298*78910aadSHong Zhang damp = PETSC_FALSE; 299*78910aadSHong Zhang chshift = PETSC_FALSE; 300*78910aadSHong Zhang for (i=0; i<mbs; i++) { 301*78910aadSHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 302*78910aadSHong Zhang } 303*78910aadSHong Zhang 304*78910aadSHong Zhang for (k = 0; k<mbs; k++){ 305*78910aadSHong Zhang bval = ba + bi[k]; 306*78910aadSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 307*78910aadSHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 308*78910aadSHong Zhang for (j = jmin; j < jmax; j++){ 309*78910aadSHong Zhang col = rip[aj[j]]; 310*78910aadSHong Zhang if (col >= k){ /* only take upper triangular entry */ 311*78910aadSHong Zhang rtmp[col] = aa[j]; 312*78910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 313*78910aadSHong Zhang } 314*78910aadSHong Zhang } 315*78910aadSHong Zhang /* damp the diagonal of the matrix */ 316*78910aadSHong Zhang if (ndamp||nshift) rtmp[k] += damping+shift_amount; 317*78910aadSHong Zhang 318*78910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 319*78910aadSHong Zhang dk = rtmp[k]; 320*78910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 321*78910aadSHong Zhang 322*78910aadSHong Zhang while (i < k){ 323*78910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 324*78910aadSHong Zhang 325*78910aadSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 326*78910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 327*78910aadSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 328*78910aadSHong Zhang dk += uikdi*ba[ili]; 329*78910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 330*78910aadSHong Zhang 331*78910aadSHong Zhang /* add multiple of row i to k-th row */ 332*78910aadSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 333*78910aadSHong Zhang if (jmin < jmax){ 334*78910aadSHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 335*78910aadSHong Zhang /* update il and jl for row i */ 336*78910aadSHong Zhang il[i] = jmin; 337*78910aadSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 338*78910aadSHong Zhang } 339*78910aadSHong Zhang i = nexti; 340*78910aadSHong Zhang } 341*78910aadSHong Zhang 342*78910aadSHong Zhang if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 343*78910aadSHong Zhang /* calculate a shift that would make this row diagonally dominant */ 344*78910aadSHong Zhang PetscReal rs = PetscAbs(PetscRealPart(dk)); 345*78910aadSHong Zhang jmin = bi[k]+1; 346*78910aadSHong Zhang nz = bi[k+1] - jmin; 347*78910aadSHong Zhang if (nz){ 348*78910aadSHong Zhang bcol = bj + jmin; 349*78910aadSHong Zhang bval = ba + jmin; 350*78910aadSHong Zhang while (nz--){ 351*78910aadSHong Zhang rs += PetscAbsScalar(rtmp[*bcol++]); 352*78910aadSHong Zhang } 353*78910aadSHong Zhang } 354*78910aadSHong Zhang /* if this shift is less than the previous, just up the previous 355*78910aadSHong Zhang one by a bit */ 356*78910aadSHong Zhang shift_amount = PetscMax(rs,1.1*shift_amount); 357*78910aadSHong Zhang chshift = PETSC_TRUE; 358*78910aadSHong Zhang /* Unlike in the ILU case there is no exit condition on nshift: 359*78910aadSHong Zhang we increase the shift until it converges. There is no guarantee that 360*78910aadSHong Zhang this algorithm converges faster or slower, or is better or worse 361*78910aadSHong Zhang than the ILU algorithm. */ 362*78910aadSHong Zhang nshift++; 363*78910aadSHong Zhang break; 364*78910aadSHong Zhang } 365*78910aadSHong Zhang if (PetscRealPart(dk) < zeropivot){ 366*78910aadSHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 367*78910aadSHong Zhang if (damping > 0.0) { 368*78910aadSHong Zhang if (ndamp) damping *= 2.0; 369*78910aadSHong Zhang damp = PETSC_TRUE; 370*78910aadSHong Zhang ndamp++; 371*78910aadSHong Zhang break; 372*78910aadSHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 373*78910aadSHong Zhang SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 374*78910aadSHong Zhang } else { 375*78910aadSHong Zhang PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 376*78910aadSHong Zhang } 377*78910aadSHong Zhang } 378*78910aadSHong Zhang 379*78910aadSHong Zhang /* copy data into U(k,:) */ 380*78910aadSHong Zhang ba[bi[k]] = 1.0/dk; /* U(k,k) */ 381*78910aadSHong Zhang jmin = bi[k]+1; jmax = bi[k+1]; 382*78910aadSHong Zhang if (jmin < jmax) { 383*78910aadSHong Zhang for (j=jmin; j<jmax; j++){ 384*78910aadSHong Zhang col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 385*78910aadSHong Zhang } 386*78910aadSHong Zhang /* add the k-th row into il and jl */ 387*78910aadSHong Zhang il[k] = jmin; 388*78910aadSHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 389*78910aadSHong Zhang } 390*78910aadSHong Zhang } 391*78910aadSHong Zhang } while (damp||chshift); 392*78910aadSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 393*78910aadSHong Zhang 394*78910aadSHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 395*78910aadSHong Zhang C->factor = FACTOR_CHOLESKY; 396*78910aadSHong Zhang C->assembled = PETSC_TRUE; 397*78910aadSHong Zhang C->preallocated = PETSC_TRUE; 398*78910aadSHong Zhang PetscLogFlops(C->m); 399*78910aadSHong Zhang if (ndamp) { 400*78910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 401*78910aadSHong Zhang } 402*78910aadSHong Zhang if (nshift) { 403*78910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift); 404*78910aadSHong Zhang } 405c05c3958SHong Zhang PetscFunctionReturn(0); 406c05c3958SHong Zhang } 4074cd38560SBarry Smith 408c05c3958SHong Zhang #undef __FUNCT__ 409c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 410c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact) 411c05c3958SHong Zhang { 412*78910aadSHong Zhang Mat C = *fact; 413*78910aadSHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 414*78910aadSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 415*78910aadSHong Zhang PetscErrorCode ierr; 416*78910aadSHong Zhang PetscInt i,j,am=a->mbs,bs=A->bs; 417*78910aadSHong Zhang PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 418*78910aadSHong Zhang PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 419*78910aadSHong Zhang MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 420*78910aadSHong Zhang PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 421*78910aadSHong Zhang PetscTruth damp,chshift; 422*78910aadSHong Zhang PetscInt nshift=0; 423*78910aadSHong Zhang 424c05c3958SHong Zhang PetscFunctionBegin; 425*78910aadSHong Zhang if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet"); 426*78910aadSHong Zhang /* initialization */ 427*78910aadSHong Zhang nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 428*78910aadSHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 429*78910aadSHong Zhang jl = il + am; 430*78910aadSHong Zhang rtmp = (MatScalar*)(jl + am); 431*78910aadSHong Zhang 432*78910aadSHong Zhang shift_amount = 0; 433*78910aadSHong Zhang do { 434*78910aadSHong Zhang damp = PETSC_FALSE; 435*78910aadSHong Zhang chshift = PETSC_FALSE; 436*78910aadSHong Zhang for (i=0; i<am; i++) { 437*78910aadSHong Zhang rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 438*78910aadSHong Zhang } 439*78910aadSHong Zhang 440*78910aadSHong Zhang for (k = 0; k<am; k++){ 441*78910aadSHong Zhang /* initialize k-th row with elements nonzero in row perm(k) of A */ 442*78910aadSHong Zhang nz = ai[k+1] - ai[k]; 443*78910aadSHong Zhang acol = aj + ai[k]; 444*78910aadSHong Zhang aval = aa + ai[k]; 445*78910aadSHong Zhang bval = ba + bi[k]; 446*78910aadSHong Zhang while (nz -- ){ 447*78910aadSHong Zhang if (*acol < k) { /* skip lower triangular entries */ 448*78910aadSHong Zhang acol++; aval++; 449*78910aadSHong Zhang } else { 450*78910aadSHong Zhang rtmp[*acol++] = *aval++; 451*78910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 452*78910aadSHong Zhang } 453*78910aadSHong Zhang } 454*78910aadSHong Zhang /* damp the diagonal of the matrix */ 455*78910aadSHong Zhang if (ndamp||nshift) rtmp[k] += damping+shift_amount; 456*78910aadSHong Zhang 457*78910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 458*78910aadSHong Zhang dk = rtmp[k]; 459*78910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 460*78910aadSHong Zhang 461*78910aadSHong Zhang while (i < k){ 462*78910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 463*78910aadSHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 464*78910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 465*78910aadSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; 466*78910aadSHong Zhang dk += uikdi*ba[ili]; 467*78910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 468*78910aadSHong Zhang 469*78910aadSHong Zhang /* add multiple of row i to k-th row ... */ 470*78910aadSHong Zhang jmin = ili + 1; 471*78910aadSHong Zhang nz = bi[i+1] - jmin; 472*78910aadSHong Zhang if (nz > 0){ 473*78910aadSHong Zhang bcol = bj + jmin; 474*78910aadSHong Zhang bval = ba + jmin; 475*78910aadSHong Zhang while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 476*78910aadSHong Zhang /* update il and jl for i-th row */ 477*78910aadSHong Zhang il[i] = jmin; 478*78910aadSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 479*78910aadSHong Zhang } 480*78910aadSHong Zhang i = nexti; 481*78910aadSHong Zhang } 482*78910aadSHong Zhang 483*78910aadSHong Zhang if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 484*78910aadSHong Zhang /* calculate a shift that would make this row diagonally dominant */ 485*78910aadSHong Zhang PetscReal rs = PetscAbs(PetscRealPart(dk)); 486*78910aadSHong Zhang jmin = bi[k]+1; 487*78910aadSHong Zhang nz = bi[k+1] - jmin; 488*78910aadSHong Zhang if (nz){ 489*78910aadSHong Zhang bcol = bj + jmin; 490*78910aadSHong Zhang bval = ba + jmin; 491*78910aadSHong Zhang while (nz--){ 492*78910aadSHong Zhang rs += PetscAbsScalar(rtmp[*bcol++]); 493*78910aadSHong Zhang } 494*78910aadSHong Zhang } 495*78910aadSHong Zhang /* if this shift is less than the previous, just up the previous 496*78910aadSHong Zhang one by a bit */ 497*78910aadSHong Zhang shift_amount = PetscMax(rs,1.1*shift_amount); 498*78910aadSHong Zhang chshift = PETSC_TRUE; 499*78910aadSHong Zhang /* Unlike in the ILU case there is no exit condition on nshift: 500*78910aadSHong Zhang we increase the shift until it converges. There is no guarantee that 501*78910aadSHong Zhang this algorithm converges faster or slower, or is better or worse 502*78910aadSHong Zhang than the ILU algorithm. */ 503*78910aadSHong Zhang nshift++; 504*78910aadSHong Zhang break; 505*78910aadSHong Zhang } 506*78910aadSHong Zhang if (PetscRealPart(dk) < zeropivot){ 507*78910aadSHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 508*78910aadSHong Zhang if (damping > 0.0) { 509*78910aadSHong Zhang if (ndamp) damping *= 2.0; 510*78910aadSHong Zhang damp = PETSC_TRUE; 511*78910aadSHong Zhang ndamp++; 512*78910aadSHong Zhang break; 513*78910aadSHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 514*78910aadSHong Zhang SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 515*78910aadSHong Zhang } else { 516*78910aadSHong Zhang PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 517*78910aadSHong Zhang } 518*78910aadSHong Zhang } 519*78910aadSHong Zhang 520*78910aadSHong Zhang /* copy data into U(k,:) */ 521*78910aadSHong Zhang ba[bi[k]] = 1.0/dk; 522*78910aadSHong Zhang jmin = bi[k]+1; 523*78910aadSHong Zhang nz = bi[k+1] - jmin; 524*78910aadSHong Zhang if (nz){ 525*78910aadSHong Zhang bcol = bj + jmin; 526*78910aadSHong Zhang bval = ba + jmin; 527*78910aadSHong Zhang while (nz--){ 528*78910aadSHong Zhang *bval++ = rtmp[*bcol]; 529*78910aadSHong Zhang rtmp[*bcol++] = 0.0; 530*78910aadSHong Zhang } 531*78910aadSHong Zhang /* add k-th row into il and jl */ 532*78910aadSHong Zhang il[k] = jmin; 533*78910aadSHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 534*78910aadSHong Zhang } 535*78910aadSHong Zhang } 536*78910aadSHong Zhang } while (damp||chshift); 537*78910aadSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 538*78910aadSHong Zhang 539*78910aadSHong Zhang C->factor = FACTOR_CHOLESKY; 540*78910aadSHong Zhang C->assembled = PETSC_TRUE; 541*78910aadSHong Zhang C->preallocated = PETSC_TRUE; 542*78910aadSHong Zhang PetscLogFlops(C->m); 543*78910aadSHong Zhang if (ndamp) { 544*78910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 545*78910aadSHong Zhang } 546*78910aadSHong Zhang if (nshift) { 547*78910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 548*78910aadSHong Zhang } 549c05c3958SHong Zhang PetscFunctionReturn(0); 550c05c3958SHong Zhang } 551c05c3958SHong Zhang 552c05c3958SHong Zhang #include "petscbt.h" 553c05c3958SHong Zhang #include "src/mat/utils/freespace.h" 554c05c3958SHong Zhang #undef __FUNCT__ 555c05c3958SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 556c05c3958SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 557c05c3958SHong Zhang { 558*78910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 559*78910aadSHong Zhang Mat_SeqSBAIJ *b; 560*78910aadSHong Zhang Mat B; 561*78910aadSHong Zhang PetscErrorCode ierr; 562*78910aadSHong Zhang PetscTruth perm_identity; 563*78910aadSHong Zhang PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui; 564*78910aadSHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 565*78910aadSHong Zhang PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 566*78910aadSHong Zhang PetscReal fill=info->fill,levels=info->levels; 567*78910aadSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 568*78910aadSHong Zhang FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 569*78910aadSHong Zhang PetscBT lnkbt; 570*78910aadSHong Zhang 571c05c3958SHong Zhang PetscFunctionBegin; 572*78910aadSHong Zhang if (bs > 1) SETERRQ(PETSC_ERR_USER,"not done yet"); 573*78910aadSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 574*78910aadSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 575*78910aadSHong Zhang 576*78910aadSHong Zhang /* special case that simply copies fill pattern */ 577*78910aadSHong Zhang if (!levels && perm_identity) { 578*78910aadSHong Zhang ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 579*78910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 580*78910aadSHong Zhang for (i=0; i<am; i++) { 581*78910aadSHong Zhang ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 582*78910aadSHong Zhang } 583*78910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 584*78910aadSHong Zhang B = *fact; 585*78910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 586*78910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 587*78910aadSHong Zhang 588*78910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 589*78910aadSHong Zhang uj = b->j; 590*78910aadSHong Zhang for (i=0; i<am; i++) { 591*78910aadSHong Zhang aj = a->j + a->diag[i]; 592*78910aadSHong Zhang for (j=0; j<ui[i]; j++){ 593*78910aadSHong Zhang *uj++ = *aj++; 594*78910aadSHong Zhang } 595*78910aadSHong Zhang b->ilen[i] = ui[i]; 596*78910aadSHong Zhang } 597*78910aadSHong Zhang ierr = PetscFree(ui);CHKERRQ(ierr); 598*78910aadSHong Zhang ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 599*78910aadSHong Zhang ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 600*78910aadSHong Zhang 601*78910aadSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 602*78910aadSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 603*78910aadSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 604*78910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 605*78910aadSHong Zhang PetscFunctionReturn(0); 606*78910aadSHong Zhang } 607*78910aadSHong Zhang 608*78910aadSHong Zhang /* initialization */ 609*78910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 610*78910aadSHong Zhang ui[0] = 0; 611*78910aadSHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 612*78910aadSHong Zhang 613*78910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 614*78910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 615*78910aadSHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 616*78910aadSHong Zhang il = jl + am; 617*78910aadSHong Zhang uj_ptr = (PetscInt**)(il + am); 618*78910aadSHong Zhang uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 619*78910aadSHong Zhang for (i=0; i<am; i++){ 620*78910aadSHong Zhang jl[i] = am; il[i] = 0; 621*78910aadSHong Zhang } 622*78910aadSHong Zhang 623*78910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 624*78910aadSHong Zhang nlnk = am + 1; 625*78910aadSHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 626*78910aadSHong Zhang 627*78910aadSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 628*78910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 629*78910aadSHong Zhang current_space = free_space; 630*78910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 631*78910aadSHong Zhang current_space_lvl = free_space_lvl; 632*78910aadSHong Zhang 633*78910aadSHong Zhang for (k=0; k<am; k++){ /* for each active row k */ 634*78910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 635*78910aadSHong Zhang nzk = 0; 636*78910aadSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 637*78910aadSHong Zhang ncols_upper = 0; 638*78910aadSHong Zhang cols = cols_lvl + am; 639*78910aadSHong Zhang for (j=0; j<ncols; j++){ 640*78910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 641*78910aadSHong Zhang if (i >= k){ /* only take upper triangular entry */ 642*78910aadSHong Zhang cols[ncols_upper] = i; 643*78910aadSHong Zhang cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 644*78910aadSHong Zhang ncols_upper++; 645*78910aadSHong Zhang } 646*78910aadSHong Zhang } 647*78910aadSHong Zhang ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 648*78910aadSHong Zhang nzk += nlnk; 649*78910aadSHong Zhang 650*78910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 651*78910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 652*78910aadSHong Zhang 653*78910aadSHong Zhang while (prow < k){ 654*78910aadSHong Zhang nextprow = jl[prow]; 655*78910aadSHong Zhang 656*78910aadSHong Zhang /* merge prow into k-th row */ 657*78910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 658*78910aadSHong Zhang jmax = ui[prow+1]; 659*78910aadSHong Zhang ncols = jmax-jmin; 660*78910aadSHong Zhang i = jmin - ui[prow]; 661*78910aadSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 662*78910aadSHong Zhang for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 663*78910aadSHong Zhang ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 664*78910aadSHong Zhang nzk += nlnk; 665*78910aadSHong Zhang 666*78910aadSHong Zhang /* update il and jl for prow */ 667*78910aadSHong Zhang if (jmin < jmax){ 668*78910aadSHong Zhang il[prow] = jmin; 669*78910aadSHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 670*78910aadSHong Zhang } 671*78910aadSHong Zhang prow = nextprow; 672*78910aadSHong Zhang } 673*78910aadSHong Zhang 674*78910aadSHong Zhang /* if free space is not available, make more free space */ 675*78910aadSHong Zhang if (current_space->local_remaining<nzk) { 676*78910aadSHong Zhang i = am - k + 1; /* num of unfactored rows */ 677*78910aadSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 678*78910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 679*78910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 680*78910aadSHong Zhang reallocs++; 681*78910aadSHong Zhang } 682*78910aadSHong Zhang 683*78910aadSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 684*78910aadSHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 685*78910aadSHong Zhang 686*78910aadSHong Zhang /* add the k-th row into il and jl */ 687*78910aadSHong Zhang if (nzk-1 > 0){ 688*78910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 689*78910aadSHong Zhang jl[k] = jl[i]; jl[i] = k; 690*78910aadSHong Zhang il[k] = ui[k] + 1; 691*78910aadSHong Zhang } 692*78910aadSHong Zhang uj_ptr[k] = current_space->array; 693*78910aadSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 694*78910aadSHong Zhang 695*78910aadSHong Zhang current_space->array += nzk; 696*78910aadSHong Zhang current_space->local_used += nzk; 697*78910aadSHong Zhang current_space->local_remaining -= nzk; 698*78910aadSHong Zhang 699*78910aadSHong Zhang current_space_lvl->array += nzk; 700*78910aadSHong Zhang current_space_lvl->local_used += nzk; 701*78910aadSHong Zhang current_space_lvl->local_remaining -= nzk; 702*78910aadSHong Zhang 703*78910aadSHong Zhang ui[k+1] = ui[k] + nzk; 704*78910aadSHong Zhang } 705*78910aadSHong Zhang 706*78910aadSHong Zhang if (ai[am] != 0) { 707*78910aadSHong Zhang PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 708*78910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 709*78910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 710*78910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 711*78910aadSHong Zhang } else { 712*78910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 713*78910aadSHong Zhang } 714*78910aadSHong Zhang 715*78910aadSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 716*78910aadSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 717*78910aadSHong Zhang ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 718*78910aadSHong Zhang 719*78910aadSHong Zhang /* destroy list of free space and other temporary array(s) */ 720*78910aadSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 721*78910aadSHong Zhang ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 722*78910aadSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 723*78910aadSHong Zhang ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 724*78910aadSHong Zhang 725*78910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 726*78910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 727*78910aadSHong Zhang B = *fact; 728*78910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 729*78910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 730*78910aadSHong Zhang 731*78910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 732*78910aadSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 733*78910aadSHong Zhang b->singlemalloc = PETSC_FALSE; 734*78910aadSHong Zhang /* the next line frees the default space generated by the Create() */ 735*78910aadSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 736*78910aadSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 737*78910aadSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 738*78910aadSHong Zhang b->j = uj; 739*78910aadSHong Zhang b->i = ui; 740*78910aadSHong Zhang b->diag = 0; 741*78910aadSHong Zhang b->ilen = 0; 742*78910aadSHong Zhang b->imax = 0; 743*78910aadSHong Zhang b->row = perm; 744*78910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 745*78910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 746*78910aadSHong Zhang b->icol = perm; 747*78910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 748*78910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 749*78910aadSHong Zhang PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 750*78910aadSHong Zhang b->maxnz = b->nz = ui[am]; 751*78910aadSHong Zhang 752*78910aadSHong Zhang B->factor = FACTOR_CHOLESKY; 753*78910aadSHong Zhang B->info.factor_mallocs = reallocs; 754*78910aadSHong Zhang B->info.fill_ratio_given = fill; 755*78910aadSHong Zhang if (ai[am] != 0) { 756*78910aadSHong Zhang B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 757*78910aadSHong Zhang } else { 758*78910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 759*78910aadSHong Zhang } 760*78910aadSHong Zhang if (perm_identity){ 761*78910aadSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 762*78910aadSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 763*78910aadSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 764*78910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 765*78910aadSHong Zhang } else { 766*78910aadSHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 767*78910aadSHong Zhang } 768c05c3958SHong Zhang PetscFunctionReturn(0); 769c05c3958SHong Zhang } 770c05c3958SHong Zhang 771c05c3958SHong Zhang #undef __FUNCT__ 772c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 773c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 774c05c3958SHong Zhang { 775*78910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 776*78910aadSHong Zhang Mat_SeqSBAIJ *b; 777*78910aadSHong Zhang Mat B; 778*78910aadSHong Zhang PetscErrorCode ierr; 779*78910aadSHong Zhang PetscTruth perm_identity; 780*78910aadSHong Zhang PetscReal fill = info->fill; 781*78910aadSHong Zhang PetscInt *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 782*78910aadSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 783*78910aadSHong Zhang PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 784*78910aadSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 785*78910aadSHong Zhang PetscBT lnkbt; 786*78910aadSHong Zhang IS iperm; 787*78910aadSHong Zhang 788c05c3958SHong Zhang PetscFunctionBegin; 789*78910aadSHong Zhang /* check whether perm is the identity mapping */ 790*78910aadSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 791*78910aadSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 792*78910aadSHong Zhang 793*78910aadSHong Zhang if (!perm_identity){ 794*78910aadSHong Zhang /* check if perm is symmetric! */ 795*78910aadSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 796*78910aadSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 797*78910aadSHong Zhang for (i=0; i<mbs; i++) { 798*78910aadSHong Zhang if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 799*78910aadSHong Zhang } 800*78910aadSHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 801*78910aadSHong Zhang ierr = ISDestroy(iperm);CHKERRQ(ierr); 802*78910aadSHong Zhang } 803*78910aadSHong Zhang 804*78910aadSHong Zhang /* initialization */ 805*78910aadSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 806*78910aadSHong Zhang ui[0] = 0; 807*78910aadSHong Zhang 808*78910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 809*78910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 810*78910aadSHong Zhang ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 811*78910aadSHong Zhang il = jl + mbs; 812*78910aadSHong Zhang cols = il + mbs; 813*78910aadSHong Zhang ui_ptr = (PetscInt**)(cols + mbs); 814*78910aadSHong Zhang for (i=0; i<mbs; i++){ 815*78910aadSHong Zhang jl[i] = mbs; il[i] = 0; 816*78910aadSHong Zhang } 817*78910aadSHong Zhang 818*78910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 819*78910aadSHong Zhang nlnk = mbs + 1; 820*78910aadSHong Zhang ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 821*78910aadSHong Zhang 822*78910aadSHong Zhang /* initial FreeSpace size is fill*(ai[mbs]+1) */ 823*78910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 824*78910aadSHong Zhang current_space = free_space; 825*78910aadSHong Zhang 826*78910aadSHong Zhang for (k=0; k<mbs; k++){ /* for each active row k */ 827*78910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 828*78910aadSHong Zhang nzk = 0; 829*78910aadSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 830*78910aadSHong Zhang ncols_upper = 0; 831*78910aadSHong Zhang for (j=0; j<ncols; j++){ 832*78910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 833*78910aadSHong Zhang if (i >= k){ /* only take upper triangular entry */ 834*78910aadSHong Zhang cols[ncols_upper] = i; 835*78910aadSHong Zhang ncols_upper++; 836*78910aadSHong Zhang } 837*78910aadSHong Zhang } 838*78910aadSHong Zhang ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 839*78910aadSHong Zhang nzk += nlnk; 840*78910aadSHong Zhang 841*78910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 842*78910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 843*78910aadSHong Zhang 844*78910aadSHong Zhang while (prow < k){ 845*78910aadSHong Zhang nextprow = jl[prow]; 846*78910aadSHong Zhang /* merge prow into k-th row */ 847*78910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 848*78910aadSHong Zhang jmax = ui[prow+1]; 849*78910aadSHong Zhang ncols = jmax-jmin; 850*78910aadSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 851*78910aadSHong Zhang ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 852*78910aadSHong Zhang nzk += nlnk; 853*78910aadSHong Zhang 854*78910aadSHong Zhang /* update il and jl for prow */ 855*78910aadSHong Zhang if (jmin < jmax){ 856*78910aadSHong Zhang il[prow] = jmin; 857*78910aadSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 858*78910aadSHong Zhang } 859*78910aadSHong Zhang prow = nextprow; 860*78910aadSHong Zhang } 861*78910aadSHong Zhang 862*78910aadSHong Zhang /* if free space is not available, make more free space */ 863*78910aadSHong Zhang if (current_space->local_remaining<nzk) { 864*78910aadSHong Zhang i = mbs - k + 1; /* num of unfactored rows */ 865*78910aadSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 866*78910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 867*78910aadSHong Zhang reallocs++; 868*78910aadSHong Zhang } 869*78910aadSHong Zhang 870*78910aadSHong Zhang /* copy data into free space, then initialize lnk */ 871*78910aadSHong Zhang ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 872*78910aadSHong Zhang 873*78910aadSHong Zhang /* add the k-th row into il and jl */ 874*78910aadSHong Zhang if (nzk-1 > 0){ 875*78910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 876*78910aadSHong Zhang jl[k] = jl[i]; jl[i] = k; 877*78910aadSHong Zhang il[k] = ui[k] + 1; 878*78910aadSHong Zhang } 879*78910aadSHong Zhang ui_ptr[k] = current_space->array; 880*78910aadSHong Zhang current_space->array += nzk; 881*78910aadSHong Zhang current_space->local_used += nzk; 882*78910aadSHong Zhang current_space->local_remaining -= nzk; 883*78910aadSHong Zhang 884*78910aadSHong Zhang ui[k+1] = ui[k] + nzk; 885*78910aadSHong Zhang } 886*78910aadSHong Zhang 887*78910aadSHong Zhang if (ai[mbs] != 0) { 888*78910aadSHong Zhang PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 889*78910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 890*78910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 891*78910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 892*78910aadSHong Zhang } else { 893*78910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 894*78910aadSHong Zhang } 895*78910aadSHong Zhang 896*78910aadSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 897*78910aadSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 898*78910aadSHong Zhang 899*78910aadSHong Zhang /* destroy list of free space and other temporary array(s) */ 900*78910aadSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 901*78910aadSHong Zhang ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 902*78910aadSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 903*78910aadSHong Zhang 904*78910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 905*78910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 906*78910aadSHong Zhang B = *fact; 907*78910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 908*78910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 909*78910aadSHong Zhang 910*78910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 911*78910aadSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 912*78910aadSHong Zhang b->singlemalloc = PETSC_FALSE; 913*78910aadSHong Zhang /* the next line frees the default space generated by the Create() */ 914*78910aadSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 915*78910aadSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 916*78910aadSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 917*78910aadSHong Zhang b->j = uj; 918*78910aadSHong Zhang b->i = ui; 919*78910aadSHong Zhang b->diag = 0; 920*78910aadSHong Zhang b->ilen = 0; 921*78910aadSHong Zhang b->imax = 0; 922*78910aadSHong Zhang b->row = perm; 923*78910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 924*78910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 925*78910aadSHong Zhang b->icol = perm; 926*78910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 927*78910aadSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 928*78910aadSHong Zhang PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 929*78910aadSHong Zhang b->maxnz = b->nz = ui[mbs]; 930*78910aadSHong Zhang 931*78910aadSHong Zhang B->factor = FACTOR_CHOLESKY; 932*78910aadSHong Zhang B->info.factor_mallocs = reallocs; 933*78910aadSHong Zhang B->info.fill_ratio_given = fill; 934*78910aadSHong Zhang if (ai[mbs] != 0) { 935*78910aadSHong Zhang B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 936*78910aadSHong Zhang } else { 937*78910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 938*78910aadSHong Zhang } 939*78910aadSHong Zhang if (perm_identity){ 940*78910aadSHong Zhang (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 941*78910aadSHong Zhang (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 942*78910aadSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 943*78910aadSHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 944*78910aadSHong Zhang } else { 945*78910aadSHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 946*78910aadSHong Zhang } 947c05c3958SHong Zhang PetscFunctionReturn(0); 948c05c3958SHong Zhang } 949