15d517e7eSBarry Smith /* 2ec3a8b7bSBarry Smith Factorization code for BAIJ format. 35d517e7eSBarry Smith */ 470f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 54078e994SBarry Smith #include "src/inline/ilu.h" 6ec3a8b7bSBarry Smith 76d84be18SBarry Smith /* ------------------------------------------------------------*/ 86d84be18SBarry Smith /* 94eeb42bcSBarry Smith Version for when blocks are 2 by 2 104eeb42bcSBarry Smith */ 114a2ae208SSatish Balay #undef __FUNCT__ 124a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 144eeb42bcSBarry Smith { 154eeb42bcSBarry Smith Mat C = *B; 164eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 177cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 186849ba73SBarry Smith PetscErrorCode ierr; 19690b6cddSBarry Smith PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 20690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row; 21690b6cddSBarry Smith PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22329f5518SBarry Smith MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 232515f8d2SSatish Balay MatScalar p1,p2,p3,p4; 243f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 254eeb42bcSBarry Smith 263a40ed3dSBarry Smith PetscFunctionBegin; 274eeb42bcSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284eeb42bcSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 304eeb42bcSBarry Smith 314eeb42bcSBarry Smith for (i=0; i<n; i++) { 324078e994SBarry Smith nz = bi[i+1] - bi[i]; 334078e994SBarry Smith ajtmp = bj + bi[i]; 344eeb42bcSBarry Smith for (j=0; j<nz; j++) { 354eeb42bcSBarry Smith x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 364eeb42bcSBarry Smith } 374eeb42bcSBarry Smith /* load in initial (unfactored row) */ 384eeb42bcSBarry Smith idx = r[i]; 394078e994SBarry Smith nz = ai[idx+1] - ai[idx]; 404078e994SBarry Smith ajtmpold = aj + ai[idx]; 414078e994SBarry Smith v = aa + 4*ai[idx]; 424eeb42bcSBarry Smith for (j=0; j<nz; j++) { 434eeb42bcSBarry Smith x = rtmp+4*ic[ajtmpold[j]]; 444eeb42bcSBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 454eeb42bcSBarry Smith v += 4; 464eeb42bcSBarry Smith } 474eeb42bcSBarry Smith row = *ajtmp++; 484eeb42bcSBarry Smith while (row < i) { 494eeb42bcSBarry Smith pc = rtmp + 4*row; 504eeb42bcSBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 5188685aaeSLois Curfman McInnes if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 524078e994SBarry Smith pv = ba + 4*diag_offset[row]; 534078e994SBarry Smith pj = bj + diag_offset[row] + 1; 544eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 554eeb42bcSBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 564eeb42bcSBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 574eeb42bcSBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 584eeb42bcSBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 594078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 604eeb42bcSBarry Smith pv += 4; 614eeb42bcSBarry Smith for (j=0; j<nz; j++) { 624eeb42bcSBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 634eeb42bcSBarry Smith x = rtmp + 4*pj[j]; 644eeb42bcSBarry Smith x[0] -= m1*x1 + m3*x2; 654eeb42bcSBarry Smith x[1] -= m2*x1 + m4*x2; 664eeb42bcSBarry Smith x[2] -= m1*x3 + m3*x4; 674eeb42bcSBarry Smith x[3] -= m2*x3 + m4*x4; 684eeb42bcSBarry Smith pv += 4; 694eeb42bcSBarry Smith } 70b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 714eeb42bcSBarry Smith } 724eeb42bcSBarry Smith row = *ajtmp++; 734eeb42bcSBarry Smith } 744eeb42bcSBarry Smith /* finished row so stick it into b->a */ 754078e994SBarry Smith pv = ba + 4*bi[i]; 764078e994SBarry Smith pj = bj + bi[i]; 774078e994SBarry Smith nz = bi[i+1] - bi[i]; 784eeb42bcSBarry Smith for (j=0; j<nz; j++) { 794eeb42bcSBarry Smith x = rtmp+4*pj[j]; 804eeb42bcSBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 814eeb42bcSBarry Smith pv += 4; 824eeb42bcSBarry Smith } 834eeb42bcSBarry Smith /* invert diagonal block */ 844078e994SBarry Smith w = ba + 4*diag_offset[i]; 854cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 864eeb42bcSBarry Smith } 874eeb42bcSBarry Smith 88606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 894eeb42bcSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 904eeb42bcSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 914eeb42bcSBarry Smith C->factor = FACTOR_LU; 924eeb42bcSBarry Smith C->assembled = PETSC_TRUE; 93b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 943a40ed3dSBarry Smith PetscFunctionReturn(0); 954eeb42bcSBarry Smith } 964cd38560SBarry Smith /* 974cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering 984cd38560SBarry Smith */ 994a2ae208SSatish Balay #undef __FUNCT__ 1004a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 1024cd38560SBarry Smith { 1034cd38560SBarry Smith Mat C = *B; 1044cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105dfbe8321SBarry Smith PetscErrorCode ierr; 106690b6cddSBarry Smith PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 107690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row; 108690b6cddSBarry Smith PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109329f5518SBarry Smith MatScalar *pv,*v,*rtmp,*pc,*w,*x; 1102515f8d2SSatish Balay MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 1114cd38560SBarry Smith MatScalar *ba = b->a,*aa = a->a; 1124cd38560SBarry Smith 1134cd38560SBarry Smith PetscFunctionBegin; 114b0a32e0cSBarry Smith ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1154cd38560SBarry Smith 1164cd38560SBarry Smith for (i=0; i<n; i++) { 1174cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1184cd38560SBarry Smith ajtmp = bj + bi[i]; 1194cd38560SBarry Smith for (j=0; j<nz; j++) { 1204cd38560SBarry Smith x = rtmp+4*ajtmp[j]; 1214cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0; 1224cd38560SBarry Smith } 1234cd38560SBarry Smith /* load in initial (unfactored row) */ 1244cd38560SBarry Smith nz = ai[i+1] - ai[i]; 1254cd38560SBarry Smith ajtmpold = aj + ai[i]; 1264cd38560SBarry Smith v = aa + 4*ai[i]; 1274cd38560SBarry Smith for (j=0; j<nz; j++) { 1284cd38560SBarry Smith x = rtmp+4*ajtmpold[j]; 1294cd38560SBarry Smith x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 1304cd38560SBarry Smith v += 4; 1314cd38560SBarry Smith } 1324cd38560SBarry Smith row = *ajtmp++; 1334cd38560SBarry Smith while (row < i) { 1344cd38560SBarry Smith pc = rtmp + 4*row; 1354cd38560SBarry Smith p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 1364cd38560SBarry Smith if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 1374cd38560SBarry Smith pv = ba + 4*diag_offset[row]; 1384cd38560SBarry Smith pj = bj + diag_offset[row] + 1; 1394cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1404cd38560SBarry Smith pc[0] = m1 = p1*x1 + p3*x2; 1414cd38560SBarry Smith pc[1] = m2 = p2*x1 + p4*x2; 1424cd38560SBarry Smith pc[2] = m3 = p1*x3 + p3*x4; 1434cd38560SBarry Smith pc[3] = m4 = p2*x3 + p4*x4; 1444cd38560SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 1454cd38560SBarry Smith pv += 4; 1464cd38560SBarry Smith for (j=0; j<nz; j++) { 1474cd38560SBarry Smith x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 1484cd38560SBarry Smith x = rtmp + 4*pj[j]; 1494cd38560SBarry Smith x[0] -= m1*x1 + m3*x2; 1504cd38560SBarry Smith x[1] -= m2*x1 + m4*x2; 1514cd38560SBarry Smith x[2] -= m1*x3 + m3*x4; 1524cd38560SBarry Smith x[3] -= m2*x3 + m4*x4; 1534cd38560SBarry Smith pv += 4; 1544cd38560SBarry Smith } 155b0a32e0cSBarry Smith PetscLogFlops(16*nz+12); 1564cd38560SBarry Smith } 1574cd38560SBarry Smith row = *ajtmp++; 1584cd38560SBarry Smith } 1594cd38560SBarry Smith /* finished row so stick it into b->a */ 1604cd38560SBarry Smith pv = ba + 4*bi[i]; 1614cd38560SBarry Smith pj = bj + bi[i]; 1624cd38560SBarry Smith nz = bi[i+1] - bi[i]; 1634cd38560SBarry Smith for (j=0; j<nz; j++) { 1644cd38560SBarry Smith x = rtmp+4*pj[j]; 1654cd38560SBarry Smith pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 1664cd38560SBarry Smith pv += 4; 1674cd38560SBarry Smith } 1684cd38560SBarry Smith /* invert diagonal block */ 1694cd38560SBarry Smith w = ba + 4*diag_offset[i]; 1704cd38560SBarry Smith ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 1714cd38560SBarry Smith /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 1724cd38560SBarry Smith } 1734cd38560SBarry Smith 174606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1754cd38560SBarry Smith C->factor = FACTOR_LU; 1764cd38560SBarry Smith C->assembled = PETSC_TRUE; 177b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1784cd38560SBarry Smith PetscFunctionReturn(0); 1794cd38560SBarry Smith } 1807fc0212eSBarry Smith 1817fc0212eSBarry Smith /* ----------------------------------------------------------- */ 1827fc0212eSBarry Smith /* 1837fc0212eSBarry Smith Version for when blocks are 1 by 1. 1847fc0212eSBarry Smith */ 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187dfbe8321SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 1887fc0212eSBarry Smith { 1897fc0212eSBarry Smith Mat C = *B; 1907fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 1917cdf2dbbSSatish Balay IS isrow = b->row,isicol = b->icol; 1926849ba73SBarry Smith PetscErrorCode ierr; 193690b6cddSBarry Smith PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 194690b6cddSBarry Smith PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 195690b6cddSBarry Smith PetscInt *diag_offset = b->diag,diag,*pj; 196329f5518SBarry Smith MatScalar *pv,*v,*rtmp,multiplier,*pc; 1973f1db9ecSBarry Smith MatScalar *ba = b->a,*aa = a->a; 1987fc0212eSBarry Smith 1993a40ed3dSBarry Smith PetscFunctionBegin; 2007fc0212eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2017fc0212eSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 202b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2037fc0212eSBarry Smith 2047fc0212eSBarry Smith for (i=0; i<n; i++) { 2054078e994SBarry Smith nz = bi[i+1] - bi[i]; 2064078e994SBarry Smith ajtmp = bj + bi[i]; 2077fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 2087fc0212eSBarry Smith 2097fc0212eSBarry Smith /* load in initial (unfactored row) */ 2104078e994SBarry Smith nz = ai[r[i]+1] - ai[r[i]]; 2114078e994SBarry Smith ajtmpold = aj + ai[r[i]]; 2124078e994SBarry Smith v = aa + ai[r[i]]; 2137fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 2147fc0212eSBarry Smith 2157fc0212eSBarry Smith row = *ajtmp++; 2167fc0212eSBarry Smith while (row < i) { 2177fc0212eSBarry Smith pc = rtmp + row; 2187fc0212eSBarry Smith if (*pc != 0.0) { 2194078e994SBarry Smith pv = ba + diag_offset[row]; 2204078e994SBarry Smith pj = bj + diag_offset[row] + 1; 2217fc0212eSBarry Smith multiplier = *pc * *pv++; 2227fc0212eSBarry Smith *pc = multiplier; 2234078e994SBarry Smith nz = bi[row+1] - diag_offset[row] - 1; 2247fc0212eSBarry Smith for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 225b0a32e0cSBarry Smith PetscLogFlops(1+2*nz); 2267fc0212eSBarry Smith } 2277fc0212eSBarry Smith row = *ajtmp++; 2287fc0212eSBarry Smith } 2297fc0212eSBarry Smith /* finished row so stick it into b->a */ 2304078e994SBarry Smith pv = ba + bi[i]; 2314078e994SBarry Smith pj = bj + bi[i]; 2324078e994SBarry Smith nz = bi[i+1] - bi[i]; 2337fc0212eSBarry Smith for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 2344078e994SBarry Smith diag = diag_offset[i] - bi[i]; 2357fc0212eSBarry Smith /* check pivot entry for current row */ 236a73cf429SBarry Smith if (pv[diag] == 0.0) { 23729bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 2387fc0212eSBarry Smith } 2397fc0212eSBarry Smith pv[diag] = 1.0/pv[diag]; 2407fc0212eSBarry Smith } 2417fc0212eSBarry Smith 242606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 2437fc0212eSBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2447fc0212eSBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2457fc0212eSBarry Smith C->factor = FACTOR_LU; 2467fc0212eSBarry Smith C->assembled = PETSC_TRUE; 247b0a32e0cSBarry Smith PetscLogFlops(C->n); 2483a40ed3dSBarry Smith PetscFunctionReturn(0); 2497fc0212eSBarry Smith } 2507fc0212eSBarry Smith 251273d9f13SBarry Smith 2525d517e7eSBarry Smith /* ----------------------------------------------------------- */ 2534a2ae208SSatish Balay #undef __FUNCT__ 2544a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqBAIJ" 255dfbe8321SBarry Smith PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 2565d517e7eSBarry Smith { 257dfbe8321SBarry Smith PetscErrorCode ierr; 2585d517e7eSBarry Smith Mat C; 2595d517e7eSBarry Smith 2603a40ed3dSBarry Smith PetscFunctionBegin; 261b9b97703SBarry Smith ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 2627fc0212eSBarry Smith ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 263273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 264b0a32e0cSBarry Smith PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 2653a40ed3dSBarry Smith PetscFunctionReturn(0); 2665d517e7eSBarry Smith } 2674cd38560SBarry Smith 268c05c3958SHong Zhang #include "src/mat/impls/sbaij/seq/sbaij.h" 269c05c3958SHong Zhang #undef __FUNCT__ 270c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 271c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B) 272c05c3958SHong Zhang { 27378910aadSHong Zhang Mat C = *B; 27478910aadSHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 27578910aadSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 27678910aadSHong Zhang IS ip=b->row; 277*6ad2eaddSHong Zhang PetscErrorCode ierr,(*f)(Mat,Mat*); 27878910aadSHong Zhang PetscInt *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol; 27978910aadSHong Zhang PetscInt *ai=a->i,*aj=a->j; 28078910aadSHong Zhang PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 28178910aadSHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 28278910aadSHong Zhang PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 28378910aadSHong Zhang PetscTruth damp,chshift; 28478910aadSHong Zhang PetscInt nshift=0,ndamp=0; 28578910aadSHong Zhang 286c05c3958SHong Zhang PetscFunctionBegin; 287*6ad2eaddSHong Zhang if (bs > 1) { 288*6ad2eaddSHong Zhang if (!a->sbaijMat){ 289*6ad2eaddSHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 290*6ad2eaddSHong Zhang } 291*6ad2eaddSHong Zhang ierr = PetscObjectQueryFunction((PetscObject)C,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 292*6ad2eaddSHong Zhang ierr = (*f)(a->sbaijMat,B);CHKERRQ(ierr); 293*6ad2eaddSHong Zhang ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 294*6ad2eaddSHong Zhang a->sbaijMat = PETSC_NULL; 295*6ad2eaddSHong Zhang PetscFunctionReturn(0); 296*6ad2eaddSHong Zhang } 29778910aadSHong Zhang 29878910aadSHong Zhang /* initialization */ 299*6ad2eaddSHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 30078910aadSHong Zhang nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 30178910aadSHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 30278910aadSHong Zhang jl = il + mbs; 30378910aadSHong Zhang rtmp = (MatScalar*)(jl + mbs); 30478910aadSHong Zhang 30578910aadSHong Zhang shift_amount = 0; 30678910aadSHong Zhang do { 30778910aadSHong Zhang damp = PETSC_FALSE; 30878910aadSHong Zhang chshift = PETSC_FALSE; 30978910aadSHong Zhang for (i=0; i<mbs; i++) { 31078910aadSHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 31178910aadSHong Zhang } 31278910aadSHong Zhang 31378910aadSHong Zhang for (k = 0; k<mbs; k++){ 31478910aadSHong Zhang bval = ba + bi[k]; 31578910aadSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 31678910aadSHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 31778910aadSHong Zhang for (j = jmin; j < jmax; j++){ 31878910aadSHong Zhang col = rip[aj[j]]; 31978910aadSHong Zhang if (col >= k){ /* only take upper triangular entry */ 32078910aadSHong Zhang rtmp[col] = aa[j]; 32178910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 32278910aadSHong Zhang } 32378910aadSHong Zhang } 32478910aadSHong Zhang /* damp the diagonal of the matrix */ 32578910aadSHong Zhang if (ndamp||nshift) rtmp[k] += damping+shift_amount; 32678910aadSHong Zhang 32778910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 32878910aadSHong Zhang dk = rtmp[k]; 32978910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 33078910aadSHong Zhang 33178910aadSHong Zhang while (i < k){ 33278910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 33378910aadSHong Zhang 33478910aadSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 33578910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 33678910aadSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 33778910aadSHong Zhang dk += uikdi*ba[ili]; 33878910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 33978910aadSHong Zhang 34078910aadSHong Zhang /* add multiple of row i to k-th row */ 34178910aadSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 34278910aadSHong Zhang if (jmin < jmax){ 34378910aadSHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 34478910aadSHong Zhang /* update il and jl for row i */ 34578910aadSHong Zhang il[i] = jmin; 34678910aadSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 34778910aadSHong Zhang } 34878910aadSHong Zhang i = nexti; 34978910aadSHong Zhang } 35078910aadSHong Zhang 35178910aadSHong Zhang if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 35278910aadSHong Zhang /* calculate a shift that would make this row diagonally dominant */ 35378910aadSHong Zhang PetscReal rs = PetscAbs(PetscRealPart(dk)); 35478910aadSHong Zhang jmin = bi[k]+1; 35578910aadSHong Zhang nz = bi[k+1] - jmin; 35678910aadSHong Zhang if (nz){ 35778910aadSHong Zhang bcol = bj + jmin; 35878910aadSHong Zhang bval = ba + jmin; 35978910aadSHong Zhang while (nz--){ 36078910aadSHong Zhang rs += PetscAbsScalar(rtmp[*bcol++]); 36178910aadSHong Zhang } 36278910aadSHong Zhang } 36378910aadSHong Zhang /* if this shift is less than the previous, just up the previous 36478910aadSHong Zhang one by a bit */ 36578910aadSHong Zhang shift_amount = PetscMax(rs,1.1*shift_amount); 36678910aadSHong Zhang chshift = PETSC_TRUE; 36778910aadSHong Zhang /* Unlike in the ILU case there is no exit condition on nshift: 36878910aadSHong Zhang we increase the shift until it converges. There is no guarantee that 36978910aadSHong Zhang this algorithm converges faster or slower, or is better or worse 37078910aadSHong Zhang than the ILU algorithm. */ 37178910aadSHong Zhang nshift++; 37278910aadSHong Zhang break; 37378910aadSHong Zhang } 37478910aadSHong Zhang if (PetscRealPart(dk) < zeropivot){ 37578910aadSHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 37678910aadSHong Zhang if (damping > 0.0) { 37778910aadSHong Zhang if (ndamp) damping *= 2.0; 37878910aadSHong Zhang damp = PETSC_TRUE; 37978910aadSHong Zhang ndamp++; 38078910aadSHong Zhang break; 38178910aadSHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 38278910aadSHong Zhang SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 38378910aadSHong Zhang } else { 38478910aadSHong Zhang PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 38578910aadSHong Zhang } 38678910aadSHong Zhang } 38778910aadSHong Zhang 38878910aadSHong Zhang /* copy data into U(k,:) */ 38978910aadSHong Zhang ba[bi[k]] = 1.0/dk; /* U(k,k) */ 39078910aadSHong Zhang jmin = bi[k]+1; jmax = bi[k+1]; 39178910aadSHong Zhang if (jmin < jmax) { 39278910aadSHong Zhang for (j=jmin; j<jmax; j++){ 39378910aadSHong Zhang col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 39478910aadSHong Zhang } 39578910aadSHong Zhang /* add the k-th row into il and jl */ 39678910aadSHong Zhang il[k] = jmin; 39778910aadSHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 39878910aadSHong Zhang } 39978910aadSHong Zhang } 40078910aadSHong Zhang } while (damp||chshift); 40178910aadSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 40278910aadSHong Zhang 40378910aadSHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 40478910aadSHong Zhang C->factor = FACTOR_CHOLESKY; 40578910aadSHong Zhang C->assembled = PETSC_TRUE; 40678910aadSHong Zhang C->preallocated = PETSC_TRUE; 40778910aadSHong Zhang PetscLogFlops(C->m); 40878910aadSHong Zhang if (ndamp) { 40978910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 41078910aadSHong Zhang } 41178910aadSHong Zhang if (nshift) { 41278910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift); 41378910aadSHong Zhang } 414c05c3958SHong Zhang PetscFunctionReturn(0); 415c05c3958SHong Zhang } 4164cd38560SBarry Smith 417c05c3958SHong Zhang #undef __FUNCT__ 418c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 419c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact) 420c05c3958SHong Zhang { 42178910aadSHong Zhang Mat C = *fact; 42278910aadSHong Zhang Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 42378910aadSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 42478910aadSHong Zhang PetscErrorCode ierr; 42578910aadSHong Zhang PetscInt i,j,am=a->mbs,bs=A->bs; 42678910aadSHong Zhang PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 42778910aadSHong Zhang PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 42878910aadSHong Zhang MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 42978910aadSHong Zhang PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 43078910aadSHong Zhang PetscTruth damp,chshift; 43178910aadSHong Zhang PetscInt nshift=0; 43278910aadSHong Zhang 433c05c3958SHong Zhang PetscFunctionBegin; 434*6ad2eaddSHong Zhang if (bs > 1) { 435*6ad2eaddSHong Zhang SETERRQ(PETSC_ERR_USER,"not done yet"); 436*6ad2eaddSHong Zhang } 437*6ad2eaddSHong Zhang 43878910aadSHong Zhang /* initialization */ 43978910aadSHong Zhang nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 44078910aadSHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 44178910aadSHong Zhang jl = il + am; 44278910aadSHong Zhang rtmp = (MatScalar*)(jl + am); 44378910aadSHong Zhang 44478910aadSHong Zhang shift_amount = 0; 44578910aadSHong Zhang do { 44678910aadSHong Zhang damp = PETSC_FALSE; 44778910aadSHong Zhang chshift = PETSC_FALSE; 44878910aadSHong Zhang for (i=0; i<am; i++) { 44978910aadSHong Zhang rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 45078910aadSHong Zhang } 45178910aadSHong Zhang 45278910aadSHong Zhang for (k = 0; k<am; k++){ 45378910aadSHong Zhang /* initialize k-th row with elements nonzero in row perm(k) of A */ 45478910aadSHong Zhang nz = ai[k+1] - ai[k]; 45578910aadSHong Zhang acol = aj + ai[k]; 45678910aadSHong Zhang aval = aa + ai[k]; 45778910aadSHong Zhang bval = ba + bi[k]; 45878910aadSHong Zhang while (nz -- ){ 45978910aadSHong Zhang if (*acol < k) { /* skip lower triangular entries */ 46078910aadSHong Zhang acol++; aval++; 46178910aadSHong Zhang } else { 46278910aadSHong Zhang rtmp[*acol++] = *aval++; 46378910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */ 46478910aadSHong Zhang } 46578910aadSHong Zhang } 46678910aadSHong Zhang /* damp the diagonal of the matrix */ 46778910aadSHong Zhang if (ndamp||nshift) rtmp[k] += damping+shift_amount; 46878910aadSHong Zhang 46978910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 47078910aadSHong Zhang dk = rtmp[k]; 47178910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 47278910aadSHong Zhang 47378910aadSHong Zhang while (i < k){ 47478910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 47578910aadSHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 47678910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 47778910aadSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; 47878910aadSHong Zhang dk += uikdi*ba[ili]; 47978910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 48078910aadSHong Zhang 48178910aadSHong Zhang /* add multiple of row i to k-th row ... */ 48278910aadSHong Zhang jmin = ili + 1; 48378910aadSHong Zhang nz = bi[i+1] - jmin; 48478910aadSHong Zhang if (nz > 0){ 48578910aadSHong Zhang bcol = bj + jmin; 48678910aadSHong Zhang bval = ba + jmin; 48778910aadSHong Zhang while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 48878910aadSHong Zhang /* update il and jl for i-th row */ 48978910aadSHong Zhang il[i] = jmin; 49078910aadSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 49178910aadSHong Zhang } 49278910aadSHong Zhang i = nexti; 49378910aadSHong Zhang } 49478910aadSHong Zhang 49578910aadSHong Zhang if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 49678910aadSHong Zhang /* calculate a shift that would make this row diagonally dominant */ 49778910aadSHong Zhang PetscReal rs = PetscAbs(PetscRealPart(dk)); 49878910aadSHong Zhang jmin = bi[k]+1; 49978910aadSHong Zhang nz = bi[k+1] - jmin; 50078910aadSHong Zhang if (nz){ 50178910aadSHong Zhang bcol = bj + jmin; 50278910aadSHong Zhang bval = ba + jmin; 50378910aadSHong Zhang while (nz--){ 50478910aadSHong Zhang rs += PetscAbsScalar(rtmp[*bcol++]); 50578910aadSHong Zhang } 50678910aadSHong Zhang } 50778910aadSHong Zhang /* if this shift is less than the previous, just up the previous 50878910aadSHong Zhang one by a bit */ 50978910aadSHong Zhang shift_amount = PetscMax(rs,1.1*shift_amount); 51078910aadSHong Zhang chshift = PETSC_TRUE; 51178910aadSHong Zhang /* Unlike in the ILU case there is no exit condition on nshift: 51278910aadSHong Zhang we increase the shift until it converges. There is no guarantee that 51378910aadSHong Zhang this algorithm converges faster or slower, or is better or worse 51478910aadSHong Zhang than the ILU algorithm. */ 51578910aadSHong Zhang nshift++; 51678910aadSHong Zhang break; 51778910aadSHong Zhang } 51878910aadSHong Zhang if (PetscRealPart(dk) < zeropivot){ 51978910aadSHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 52078910aadSHong Zhang if (damping > 0.0) { 52178910aadSHong Zhang if (ndamp) damping *= 2.0; 52278910aadSHong Zhang damp = PETSC_TRUE; 52378910aadSHong Zhang ndamp++; 52478910aadSHong Zhang break; 52578910aadSHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 52678910aadSHong Zhang SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 52778910aadSHong Zhang } else { 52878910aadSHong Zhang PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 52978910aadSHong Zhang } 53078910aadSHong Zhang } 53178910aadSHong Zhang 53278910aadSHong Zhang /* copy data into U(k,:) */ 53378910aadSHong Zhang ba[bi[k]] = 1.0/dk; 53478910aadSHong Zhang jmin = bi[k]+1; 53578910aadSHong Zhang nz = bi[k+1] - jmin; 53678910aadSHong Zhang if (nz){ 53778910aadSHong Zhang bcol = bj + jmin; 53878910aadSHong Zhang bval = ba + jmin; 53978910aadSHong Zhang while (nz--){ 54078910aadSHong Zhang *bval++ = rtmp[*bcol]; 54178910aadSHong Zhang rtmp[*bcol++] = 0.0; 54278910aadSHong Zhang } 54378910aadSHong Zhang /* add k-th row into il and jl */ 54478910aadSHong Zhang il[k] = jmin; 54578910aadSHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 54678910aadSHong Zhang } 54778910aadSHong Zhang } 54878910aadSHong Zhang } while (damp||chshift); 54978910aadSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 55078910aadSHong Zhang 55178910aadSHong Zhang C->factor = FACTOR_CHOLESKY; 55278910aadSHong Zhang C->assembled = PETSC_TRUE; 55378910aadSHong Zhang C->preallocated = PETSC_TRUE; 55478910aadSHong Zhang PetscLogFlops(C->m); 55578910aadSHong Zhang if (ndamp) { 55678910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 55778910aadSHong Zhang } 55878910aadSHong Zhang if (nshift) { 55978910aadSHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 56078910aadSHong Zhang } 561c05c3958SHong Zhang PetscFunctionReturn(0); 562c05c3958SHong Zhang } 563c05c3958SHong Zhang 564c05c3958SHong Zhang #include "petscbt.h" 565c05c3958SHong Zhang #include "src/mat/utils/freespace.h" 566c05c3958SHong Zhang #undef __FUNCT__ 567c05c3958SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 568c05c3958SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 569c05c3958SHong Zhang { 57078910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 57178910aadSHong Zhang Mat_SeqSBAIJ *b; 57278910aadSHong Zhang Mat B; 57378910aadSHong Zhang PetscErrorCode ierr; 57478910aadSHong Zhang PetscTruth perm_identity; 57578910aadSHong Zhang PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui; 57678910aadSHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 57778910aadSHong Zhang PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 57878910aadSHong Zhang PetscReal fill=info->fill,levels=info->levels; 57978910aadSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 58078910aadSHong Zhang FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 58178910aadSHong Zhang PetscBT lnkbt; 58278910aadSHong Zhang 583c05c3958SHong Zhang PetscFunctionBegin; 584*6ad2eaddSHong Zhang if (bs > 1){ 585*6ad2eaddSHong Zhang if (!a->sbaijMat){ 586*6ad2eaddSHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 587*6ad2eaddSHong Zhang } 588*6ad2eaddSHong Zhang ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 589*6ad2eaddSHong Zhang B = *fact; 590*6ad2eaddSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 591*6ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 592*6ad2eaddSHong Zhang PetscFunctionReturn(0); 593*6ad2eaddSHong Zhang } 594*6ad2eaddSHong Zhang 59578910aadSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 59678910aadSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 59778910aadSHong Zhang 59878910aadSHong Zhang /* special case that simply copies fill pattern */ 59978910aadSHong Zhang if (!levels && perm_identity) { 60078910aadSHong Zhang ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 60178910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 60278910aadSHong Zhang for (i=0; i<am; i++) { 60378910aadSHong Zhang ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 60478910aadSHong Zhang } 60578910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 60678910aadSHong Zhang B = *fact; 60778910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 60878910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 60978910aadSHong Zhang 61078910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 61178910aadSHong Zhang uj = b->j; 61278910aadSHong Zhang for (i=0; i<am; i++) { 61378910aadSHong Zhang aj = a->j + a->diag[i]; 61478910aadSHong Zhang for (j=0; j<ui[i]; j++){ 61578910aadSHong Zhang *uj++ = *aj++; 61678910aadSHong Zhang } 61778910aadSHong Zhang b->ilen[i] = ui[i]; 61878910aadSHong Zhang } 61978910aadSHong Zhang ierr = PetscFree(ui);CHKERRQ(ierr); 62078910aadSHong Zhang ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 62178910aadSHong Zhang ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 62278910aadSHong Zhang 62378910aadSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 62478910aadSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 62578910aadSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 62678910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 62778910aadSHong Zhang PetscFunctionReturn(0); 62878910aadSHong Zhang } 62978910aadSHong Zhang 63078910aadSHong Zhang /* initialization */ 63178910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 63278910aadSHong Zhang ui[0] = 0; 63378910aadSHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 63478910aadSHong Zhang 63578910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 63678910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 63778910aadSHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 63878910aadSHong Zhang il = jl + am; 63978910aadSHong Zhang uj_ptr = (PetscInt**)(il + am); 64078910aadSHong Zhang uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 64178910aadSHong Zhang for (i=0; i<am; i++){ 64278910aadSHong Zhang jl[i] = am; il[i] = 0; 64378910aadSHong Zhang } 64478910aadSHong Zhang 64578910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 64678910aadSHong Zhang nlnk = am + 1; 64778910aadSHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 64878910aadSHong Zhang 64978910aadSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 65078910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 65178910aadSHong Zhang current_space = free_space; 65278910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 65378910aadSHong Zhang current_space_lvl = free_space_lvl; 65478910aadSHong Zhang 65578910aadSHong Zhang for (k=0; k<am; k++){ /* for each active row k */ 65678910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 65778910aadSHong Zhang nzk = 0; 65878910aadSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 65978910aadSHong Zhang ncols_upper = 0; 66078910aadSHong Zhang cols = cols_lvl + am; 66178910aadSHong Zhang for (j=0; j<ncols; j++){ 66278910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 66378910aadSHong Zhang if (i >= k){ /* only take upper triangular entry */ 66478910aadSHong Zhang cols[ncols_upper] = i; 66578910aadSHong Zhang cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 66678910aadSHong Zhang ncols_upper++; 66778910aadSHong Zhang } 66878910aadSHong Zhang } 66978910aadSHong Zhang ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 67078910aadSHong Zhang nzk += nlnk; 67178910aadSHong Zhang 67278910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 67378910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 67478910aadSHong Zhang 67578910aadSHong Zhang while (prow < k){ 67678910aadSHong Zhang nextprow = jl[prow]; 67778910aadSHong Zhang 67878910aadSHong Zhang /* merge prow into k-th row */ 67978910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 68078910aadSHong Zhang jmax = ui[prow+1]; 68178910aadSHong Zhang ncols = jmax-jmin; 68278910aadSHong Zhang i = jmin - ui[prow]; 68378910aadSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 68478910aadSHong Zhang for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 68578910aadSHong Zhang ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 68678910aadSHong Zhang nzk += nlnk; 68778910aadSHong Zhang 68878910aadSHong Zhang /* update il and jl for prow */ 68978910aadSHong Zhang if (jmin < jmax){ 69078910aadSHong Zhang il[prow] = jmin; 69178910aadSHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 69278910aadSHong Zhang } 69378910aadSHong Zhang prow = nextprow; 69478910aadSHong Zhang } 69578910aadSHong Zhang 69678910aadSHong Zhang /* if free space is not available, make more free space */ 69778910aadSHong Zhang if (current_space->local_remaining<nzk) { 69878910aadSHong Zhang i = am - k + 1; /* num of unfactored rows */ 69978910aadSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 70078910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 70178910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 70278910aadSHong Zhang reallocs++; 70378910aadSHong Zhang } 70478910aadSHong Zhang 70578910aadSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 70678910aadSHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 70778910aadSHong Zhang 70878910aadSHong Zhang /* add the k-th row into il and jl */ 70978910aadSHong Zhang if (nzk-1 > 0){ 71078910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 71178910aadSHong Zhang jl[k] = jl[i]; jl[i] = k; 71278910aadSHong Zhang il[k] = ui[k] + 1; 71378910aadSHong Zhang } 71478910aadSHong Zhang uj_ptr[k] = current_space->array; 71578910aadSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 71678910aadSHong Zhang 71778910aadSHong Zhang current_space->array += nzk; 71878910aadSHong Zhang current_space->local_used += nzk; 71978910aadSHong Zhang current_space->local_remaining -= nzk; 72078910aadSHong Zhang 72178910aadSHong Zhang current_space_lvl->array += nzk; 72278910aadSHong Zhang current_space_lvl->local_used += nzk; 72378910aadSHong Zhang current_space_lvl->local_remaining -= nzk; 72478910aadSHong Zhang 72578910aadSHong Zhang ui[k+1] = ui[k] + nzk; 72678910aadSHong Zhang } 72778910aadSHong Zhang 72878910aadSHong Zhang if (ai[am] != 0) { 72978910aadSHong Zhang PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 73078910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 73178910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 73278910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 73378910aadSHong Zhang } else { 73478910aadSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 73578910aadSHong Zhang } 73678910aadSHong Zhang 73778910aadSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 73878910aadSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 73978910aadSHong Zhang ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 74078910aadSHong Zhang 74178910aadSHong Zhang /* destroy list of free space and other temporary array(s) */ 74278910aadSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 74378910aadSHong Zhang ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 74478910aadSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 74578910aadSHong Zhang ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 74678910aadSHong Zhang 74778910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 74878910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 74978910aadSHong Zhang B = *fact; 75078910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 75178910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 75278910aadSHong Zhang 75378910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 75478910aadSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 75578910aadSHong Zhang b->singlemalloc = PETSC_FALSE; 75678910aadSHong Zhang /* the next line frees the default space generated by the Create() */ 75778910aadSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 75878910aadSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 75978910aadSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 76078910aadSHong Zhang b->j = uj; 76178910aadSHong Zhang b->i = ui; 76278910aadSHong Zhang b->diag = 0; 76378910aadSHong Zhang b->ilen = 0; 76478910aadSHong Zhang b->imax = 0; 76578910aadSHong Zhang b->row = perm; 76678910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 76778910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 76878910aadSHong Zhang b->icol = perm; 76978910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 77078910aadSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 77178910aadSHong Zhang PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 77278910aadSHong Zhang b->maxnz = b->nz = ui[am]; 77378910aadSHong Zhang 77478910aadSHong Zhang B->factor = FACTOR_CHOLESKY; 77578910aadSHong Zhang B->info.factor_mallocs = reallocs; 77678910aadSHong Zhang B->info.fill_ratio_given = fill; 77778910aadSHong Zhang if (ai[am] != 0) { 77878910aadSHong Zhang B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 77978910aadSHong Zhang } else { 78078910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 78178910aadSHong Zhang } 78278910aadSHong Zhang if (perm_identity){ 78378910aadSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 78478910aadSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 78578910aadSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 78678910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 78778910aadSHong Zhang } else { 78878910aadSHong Zhang (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 78978910aadSHong Zhang } 790c05c3958SHong Zhang PetscFunctionReturn(0); 791c05c3958SHong Zhang } 792c05c3958SHong Zhang 793c05c3958SHong Zhang #undef __FUNCT__ 794c05c3958SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 795c05c3958SHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 796c05c3958SHong Zhang { 79778910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 79878910aadSHong Zhang Mat_SeqSBAIJ *b; 79978910aadSHong Zhang Mat B; 80078910aadSHong Zhang PetscErrorCode ierr; 80178910aadSHong Zhang PetscTruth perm_identity; 80278910aadSHong Zhang PetscReal fill = info->fill; 80378910aadSHong Zhang PetscInt *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 80478910aadSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 80578910aadSHong Zhang PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 80678910aadSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 80778910aadSHong Zhang PetscBT lnkbt; 80878910aadSHong Zhang IS iperm; 80978910aadSHong Zhang 810c05c3958SHong Zhang PetscFunctionBegin; 811*6ad2eaddSHong Zhang if (bs > 1) { /* convert to seqsbaij */ 812*6ad2eaddSHong Zhang if (!a->sbaijMat){ 813*6ad2eaddSHong Zhang ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 814*6ad2eaddSHong Zhang } 815*6ad2eaddSHong Zhang ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 816*6ad2eaddSHong Zhang B = *fact; 817*6ad2eaddSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 818*6ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 819*6ad2eaddSHong Zhang PetscFunctionReturn(0); 820*6ad2eaddSHong Zhang } 821*6ad2eaddSHong Zhang 82278910aadSHong Zhang /* check whether perm is the identity mapping */ 82378910aadSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 82478910aadSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 82578910aadSHong Zhang 82678910aadSHong Zhang if (!perm_identity){ 82778910aadSHong Zhang /* check if perm is symmetric! */ 82878910aadSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 82978910aadSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 83078910aadSHong Zhang for (i=0; i<mbs; i++) { 83178910aadSHong Zhang if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 83278910aadSHong Zhang } 83378910aadSHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 83478910aadSHong Zhang ierr = ISDestroy(iperm);CHKERRQ(ierr); 83578910aadSHong Zhang } 83678910aadSHong Zhang 83778910aadSHong Zhang /* initialization */ 83878910aadSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 83978910aadSHong Zhang ui[0] = 0; 84078910aadSHong Zhang 84178910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows 84278910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 84378910aadSHong Zhang ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 84478910aadSHong Zhang il = jl + mbs; 84578910aadSHong Zhang cols = il + mbs; 84678910aadSHong Zhang ui_ptr = (PetscInt**)(cols + mbs); 84778910aadSHong Zhang for (i=0; i<mbs; i++){ 84878910aadSHong Zhang jl[i] = mbs; il[i] = 0; 84978910aadSHong Zhang } 85078910aadSHong Zhang 85178910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 85278910aadSHong Zhang nlnk = mbs + 1; 85378910aadSHong Zhang ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 85478910aadSHong Zhang 85578910aadSHong Zhang /* initial FreeSpace size is fill*(ai[mbs]+1) */ 85678910aadSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 85778910aadSHong Zhang current_space = free_space; 85878910aadSHong Zhang 85978910aadSHong Zhang for (k=0; k<mbs; k++){ /* for each active row k */ 86078910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 86178910aadSHong Zhang nzk = 0; 86278910aadSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 86378910aadSHong Zhang ncols_upper = 0; 86478910aadSHong Zhang for (j=0; j<ncols; j++){ 86578910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)]; 86678910aadSHong Zhang if (i >= k){ /* only take upper triangular entry */ 86778910aadSHong Zhang cols[ncols_upper] = i; 86878910aadSHong Zhang ncols_upper++; 86978910aadSHong Zhang } 87078910aadSHong Zhang } 87178910aadSHong Zhang ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 87278910aadSHong Zhang nzk += nlnk; 87378910aadSHong Zhang 87478910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 87578910aadSHong Zhang prow = jl[k]; /* 1st pivot row */ 87678910aadSHong Zhang 87778910aadSHong Zhang while (prow < k){ 87878910aadSHong Zhang nextprow = jl[prow]; 87978910aadSHong Zhang /* merge prow into k-th row */ 88078910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 88178910aadSHong Zhang jmax = ui[prow+1]; 88278910aadSHong Zhang ncols = jmax-jmin; 88378910aadSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 88478910aadSHong Zhang ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 88578910aadSHong Zhang nzk += nlnk; 88678910aadSHong Zhang 88778910aadSHong Zhang /* update il and jl for prow */ 88878910aadSHong Zhang if (jmin < jmax){ 88978910aadSHong Zhang il[prow] = jmin; 89078910aadSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 89178910aadSHong Zhang } 89278910aadSHong Zhang prow = nextprow; 89378910aadSHong Zhang } 89478910aadSHong Zhang 89578910aadSHong Zhang /* if free space is not available, make more free space */ 89678910aadSHong Zhang if (current_space->local_remaining<nzk) { 89778910aadSHong Zhang i = mbs - k + 1; /* num of unfactored rows */ 89878910aadSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 89978910aadSHong Zhang ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 90078910aadSHong Zhang reallocs++; 90178910aadSHong Zhang } 90278910aadSHong Zhang 90378910aadSHong Zhang /* copy data into free space, then initialize lnk */ 90478910aadSHong Zhang ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 90578910aadSHong Zhang 90678910aadSHong Zhang /* add the k-th row into il and jl */ 90778910aadSHong Zhang if (nzk-1 > 0){ 90878910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 90978910aadSHong Zhang jl[k] = jl[i]; jl[i] = k; 91078910aadSHong Zhang il[k] = ui[k] + 1; 91178910aadSHong Zhang } 91278910aadSHong Zhang ui_ptr[k] = current_space->array; 91378910aadSHong Zhang current_space->array += nzk; 91478910aadSHong Zhang current_space->local_used += nzk; 91578910aadSHong Zhang current_space->local_remaining -= nzk; 91678910aadSHong Zhang 91778910aadSHong Zhang ui[k+1] = ui[k] + nzk; 91878910aadSHong Zhang } 91978910aadSHong Zhang 92078910aadSHong Zhang if (ai[mbs] != 0) { 92178910aadSHong Zhang PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 92278910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 92378910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 92478910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 92578910aadSHong Zhang } else { 92678910aadSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 92778910aadSHong Zhang } 92878910aadSHong Zhang 92978910aadSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 93078910aadSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 93178910aadSHong Zhang 93278910aadSHong Zhang /* destroy list of free space and other temporary array(s) */ 93378910aadSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 93478910aadSHong Zhang ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 93578910aadSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 93678910aadSHong Zhang 93778910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 93878910aadSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 93978910aadSHong Zhang B = *fact; 94078910aadSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 94178910aadSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 94278910aadSHong Zhang 94378910aadSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 94478910aadSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 94578910aadSHong Zhang b->singlemalloc = PETSC_FALSE; 94678910aadSHong Zhang /* the next line frees the default space generated by the Create() */ 94778910aadSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 94878910aadSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 94978910aadSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 95078910aadSHong Zhang b->j = uj; 95178910aadSHong Zhang b->i = ui; 95278910aadSHong Zhang b->diag = 0; 95378910aadSHong Zhang b->ilen = 0; 95478910aadSHong Zhang b->imax = 0; 95578910aadSHong Zhang b->row = perm; 95678910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 95778910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 95878910aadSHong Zhang b->icol = perm; 95978910aadSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 96078910aadSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 96178910aadSHong Zhang PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 96278910aadSHong Zhang b->maxnz = b->nz = ui[mbs]; 96378910aadSHong Zhang 96478910aadSHong Zhang B->factor = FACTOR_CHOLESKY; 96578910aadSHong Zhang B->info.factor_mallocs = reallocs; 96678910aadSHong Zhang B->info.fill_ratio_given = fill; 96778910aadSHong Zhang if (ai[mbs] != 0) { 96878910aadSHong Zhang B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 96978910aadSHong Zhang } else { 97078910aadSHong Zhang B->info.fill_ratio_needed = 0.0; 97178910aadSHong Zhang } 97278910aadSHong Zhang if (perm_identity){ 973*6ad2eaddSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 974*6ad2eaddSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 975*6ad2eaddSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 976*6ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 97778910aadSHong Zhang } else { 978*6ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 97978910aadSHong Zhang } 980c05c3958SHong Zhang PetscFunctionReturn(0); 981c05c3958SHong Zhang } 982