149b5e25fSSatish Balay 249b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 33a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 449b5e25fSSatish Balay #include "src/inline/ilu.h" 549a6740bSHong Zhang #include "include/petscis.h" 649b5e25fSSatish Balay 78dc9108eSHong Zhang #if !defined(PETSC_USE_COMPLEX) 85f9f512dSHong Zhang /* 95f9f512dSHong Zhang input: 10c037c3f7SHong Zhang F -- numeric factor 115f9f512dSHong Zhang output: 12c037c3f7SHong Zhang nneg, nzero, npos: matrix inertia 135f9f512dSHong Zhang */ 145f9f512dSHong Zhang 155f9f512dSHong Zhang #undef __FUNCT__ 165f9f512dSHong Zhang #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 1713f74950SBarry Smith PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 185f9f512dSHong Zhang { 19638f5ce0SDinesh Kaushik Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 203e0d88b5SBarry Smith PetscScalar *dd = fact_ptr->a; 2113f74950SBarry Smith PetscInt mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i; 225f9f512dSHong Zhang 235f9f512dSHong Zhang PetscFunctionBegin; 2477431f27SBarry Smith if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 25eeeff2ecSHong Zhang nneig_tmp = 0; npos_tmp = 0; 26eeeff2ecSHong Zhang for (i=0; i<mbs; i++){ 27eeeff2ecSHong Zhang if (PetscRealPart(dd[*fi]) > 0.0){ 28eeeff2ecSHong Zhang npos_tmp++; 29eeeff2ecSHong Zhang } else if (PetscRealPart(dd[*fi]) < 0.0){ 30eeeff2ecSHong Zhang nneig_tmp++; 315f9f512dSHong Zhang } 32eeeff2ecSHong Zhang fi++; 333e0d88b5SBarry Smith } 34eeeff2ecSHong Zhang if (nneig) *nneig = nneig_tmp; 35eeeff2ecSHong Zhang if (npos) *npos = npos_tmp; 36eeeff2ecSHong Zhang if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 37eeeff2ecSHong Zhang 385f9f512dSHong Zhang PetscFunctionReturn(0); 395f9f512dSHong Zhang } 408dc9108eSHong Zhang #endif /* !defined(PETSC_USE_COMPLEX) */ 415f9f512dSHong Zhang 425f9f512dSHong Zhang /* 435f9f512dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 44*10c27e3dSHong Zhang Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 455f9f512dSHong Zhang */ 46*10c27e3dSHong Zhang #undef __FUNCT__ 47*10c27e3dSHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 48*10c27e3dSHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 49*10c27e3dSHong Zhang { 50*10c27e3dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 51*10c27e3dSHong Zhang PetscErrorCode ierr; 52*10c27e3dSHong Zhang PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 53*10c27e3dSHong Zhang PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 54*10c27e3dSHong Zhang PetscInt m,reallocs = 0,prow; 55*10c27e3dSHong Zhang PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 56*10c27e3dSHong Zhang PetscReal f = info->fill; 57*10c27e3dSHong Zhang PetscTruth perm_identity; 58*10c27e3dSHong Zhang 59*10c27e3dSHong Zhang PetscFunctionBegin; 60*10c27e3dSHong Zhang /* check whether perm is the identity mapping */ 61*10c27e3dSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 62*10c27e3dSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 63*10c27e3dSHong Zhang 64*10c27e3dSHong Zhang if (perm_identity){ /* without permutation */ 65*10c27e3dSHong Zhang a->permute = PETSC_FALSE; 66*10c27e3dSHong Zhang ai = a->i; aj = a->j; 67*10c27e3dSHong Zhang } else { /* non-trivial permutation */ 68*10c27e3dSHong Zhang a->permute = PETSC_TRUE; 69*10c27e3dSHong Zhang ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 70*10c27e3dSHong Zhang ai = a->inew; aj = a->jnew; 71*10c27e3dSHong Zhang } 72*10c27e3dSHong Zhang 73*10c27e3dSHong Zhang /* initialization */ 74*10c27e3dSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 75*10c27e3dSHong Zhang umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 76*10c27e3dSHong Zhang ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 77*10c27e3dSHong Zhang iu[0] = mbs+1; 78*10c27e3dSHong Zhang juidx = mbs + 1; /* index for ju */ 79*10c27e3dSHong Zhang ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 80*10c27e3dSHong Zhang q = jl + mbs; /* linked list for col index */ 81*10c27e3dSHong Zhang for (i=0; i<mbs; i++){ 82*10c27e3dSHong Zhang jl[i] = mbs; 83*10c27e3dSHong Zhang q[i] = 0; 84*10c27e3dSHong Zhang } 85*10c27e3dSHong Zhang 86*10c27e3dSHong Zhang /* for each row k */ 87*10c27e3dSHong Zhang for (k=0; k<mbs; k++){ 88*10c27e3dSHong Zhang for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 89*10c27e3dSHong Zhang nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 90*10c27e3dSHong Zhang q[k] = mbs; 91*10c27e3dSHong Zhang /* initialize nonzero structure of k-th row to row rip[k] of A */ 92*10c27e3dSHong Zhang jmin = ai[rip[k]] +1; /* exclude diag[k] */ 93*10c27e3dSHong Zhang jmax = ai[rip[k]+1]; 94*10c27e3dSHong Zhang for (j=jmin; j<jmax; j++){ 95*10c27e3dSHong Zhang vj = rip[aj[j]]; /* col. value */ 96*10c27e3dSHong Zhang if(vj > k){ 97*10c27e3dSHong Zhang qm = k; 98*10c27e3dSHong Zhang do { 99*10c27e3dSHong Zhang m = qm; qm = q[m]; 100*10c27e3dSHong Zhang } while(qm < vj); 101*10c27e3dSHong Zhang if (qm == vj) { 102*10c27e3dSHong Zhang SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 103*10c27e3dSHong Zhang } 104*10c27e3dSHong Zhang nzk++; 105*10c27e3dSHong Zhang q[m] = vj; 106*10c27e3dSHong Zhang q[vj] = qm; 107*10c27e3dSHong Zhang } /* if(vj > k) */ 108*10c27e3dSHong Zhang } /* for (j=jmin; j<jmax; j++) */ 109*10c27e3dSHong Zhang 110*10c27e3dSHong Zhang /* modify nonzero structure of k-th row by computing fill-in 111*10c27e3dSHong Zhang for each row i to be merged in */ 112*10c27e3dSHong Zhang prow = k; 113*10c27e3dSHong Zhang prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 114*10c27e3dSHong Zhang 115*10c27e3dSHong Zhang while (prow < k){ 116*10c27e3dSHong Zhang /* merge row prow into k-th row */ 117*10c27e3dSHong Zhang jmin = iu[prow] + 1; jmax = iu[prow+1]; 118*10c27e3dSHong Zhang qm = k; 119*10c27e3dSHong Zhang for (j=jmin; j<jmax; j++){ 120*10c27e3dSHong Zhang vj = ju[j]; 121*10c27e3dSHong Zhang do { 122*10c27e3dSHong Zhang m = qm; qm = q[m]; 123*10c27e3dSHong Zhang } while (qm < vj); 124*10c27e3dSHong Zhang if (qm != vj){ 125*10c27e3dSHong Zhang nzk++; q[m] = vj; q[vj] = qm; qm = vj; 126*10c27e3dSHong Zhang } 127*10c27e3dSHong Zhang } 128*10c27e3dSHong Zhang prow = jl[prow]; /* next pivot row */ 129*10c27e3dSHong Zhang } 130*10c27e3dSHong Zhang 131*10c27e3dSHong Zhang /* add k to row list for first nonzero element in k-th row */ 132*10c27e3dSHong Zhang if (nzk > 0){ 133*10c27e3dSHong Zhang i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134*10c27e3dSHong Zhang jl[k] = jl[i]; jl[i] = k; 135*10c27e3dSHong Zhang } 136*10c27e3dSHong Zhang iu[k+1] = iu[k] + nzk; 137*10c27e3dSHong Zhang 138*10c27e3dSHong Zhang /* allocate more space to ju if needed */ 139*10c27e3dSHong Zhang if (iu[k+1] > umax) { 140*10c27e3dSHong Zhang /* estimate how much additional space we will need */ 141*10c27e3dSHong Zhang /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 142*10c27e3dSHong Zhang /* just double the memory each time */ 143*10c27e3dSHong Zhang maxadd = umax; 144*10c27e3dSHong Zhang if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 145*10c27e3dSHong Zhang umax += maxadd; 146*10c27e3dSHong Zhang 147*10c27e3dSHong Zhang /* allocate a longer ju */ 148*10c27e3dSHong Zhang ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 149*10c27e3dSHong Zhang ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 150*10c27e3dSHong Zhang ierr = PetscFree(ju);CHKERRQ(ierr); 151*10c27e3dSHong Zhang ju = jutmp; 152*10c27e3dSHong Zhang reallocs++; /* count how many times we realloc */ 153*10c27e3dSHong Zhang } 154*10c27e3dSHong Zhang 155*10c27e3dSHong Zhang /* save nonzero structure of k-th row in ju */ 156*10c27e3dSHong Zhang i=k; 157*10c27e3dSHong Zhang while (nzk --) { 158*10c27e3dSHong Zhang i = q[i]; 159*10c27e3dSHong Zhang ju[juidx++] = i; 160*10c27e3dSHong Zhang } 161*10c27e3dSHong Zhang } 162*10c27e3dSHong Zhang 163*10c27e3dSHong Zhang if (ai[mbs] != 0) { 164*10c27e3dSHong Zhang PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 165*10c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 166*10c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 167*10c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 168*10c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 169*10c27e3dSHong Zhang } else { 170*10c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 171*10c27e3dSHong Zhang } 172*10c27e3dSHong Zhang 173*10c27e3dSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 174*10c27e3dSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 175*10c27e3dSHong Zhang 176*10c27e3dSHong Zhang /* put together the new matrix */ 177*10c27e3dSHong Zhang ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 178*10c27e3dSHong Zhang ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 179*10c27e3dSHong Zhang ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 180*10c27e3dSHong Zhang 181*10c27e3dSHong Zhang /* PetscLogObjectParent(*B,iperm); */ 182*10c27e3dSHong Zhang b = (Mat_SeqSBAIJ*)(*B)->data; 183*10c27e3dSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 184*10c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 185*10c27e3dSHong Zhang /* the next line frees the default space generated by the Create() */ 186*10c27e3dSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 187*10c27e3dSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 188*10c27e3dSHong Zhang ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 189*10c27e3dSHong Zhang b->j = ju; 190*10c27e3dSHong Zhang b->i = iu; 191*10c27e3dSHong Zhang b->diag = 0; 192*10c27e3dSHong Zhang b->ilen = 0; 193*10c27e3dSHong Zhang b->imax = 0; 194*10c27e3dSHong Zhang b->row = perm; 195*10c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 196*10c27e3dSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 197*10c27e3dSHong Zhang b->icol = perm; 198*10c27e3dSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 199*10c27e3dSHong Zhang ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 200*10c27e3dSHong Zhang /* In b structure: Free imax, ilen, old a, old j. 201*10c27e3dSHong Zhang Allocate idnew, solve_work, new a, new j */ 202*10c27e3dSHong Zhang PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 203*10c27e3dSHong Zhang b->maxnz = b->nz = iu[mbs]; 204*10c27e3dSHong Zhang 205*10c27e3dSHong Zhang (*B)->factor = FACTOR_CHOLESKY; 206*10c27e3dSHong Zhang (*B)->info.factor_mallocs = reallocs; 207*10c27e3dSHong Zhang (*B)->info.fill_ratio_given = f; 208*10c27e3dSHong Zhang if (ai[mbs] != 0) { 209*10c27e3dSHong Zhang (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 210*10c27e3dSHong Zhang } else { 211*10c27e3dSHong Zhang (*B)->info.fill_ratio_needed = 0.0; 212*10c27e3dSHong Zhang } 213*10c27e3dSHong Zhang 214*10c27e3dSHong Zhang if (perm_identity){ 215*10c27e3dSHong Zhang switch (bs) { 216*10c27e3dSHong Zhang case 1: 217*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 218*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 219*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 220*10c27e3dSHong Zhang break; 221*10c27e3dSHong Zhang case 2: 222*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 223*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 224*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 225*10c27e3dSHong Zhang break; 226*10c27e3dSHong Zhang case 3: 227*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 228*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 229*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 230*10c27e3dSHong Zhang break; 231*10c27e3dSHong Zhang case 4: 232*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 233*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 234*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 235*10c27e3dSHong Zhang break; 236*10c27e3dSHong Zhang case 5: 237*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 238*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 239*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 240*10c27e3dSHong Zhang break; 241*10c27e3dSHong Zhang case 6: 242*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 243*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 244*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 245*10c27e3dSHong Zhang break; 246*10c27e3dSHong Zhang case 7: 247*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 248*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 249*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 250*10c27e3dSHong Zhang break; 251*10c27e3dSHong Zhang default: 252*10c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 253*10c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 254*10c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 255*10c27e3dSHong Zhang break; 256*10c27e3dSHong Zhang } 257*10c27e3dSHong Zhang } 258*10c27e3dSHong Zhang PetscFunctionReturn(0); 259*10c27e3dSHong Zhang } 260*10c27e3dSHong Zhang /* 261*10c27e3dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. 262*10c27e3dSHong Zhang */ 2634a2ae208SSatish Balay #undef __FUNCT__ 2644a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 265dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 26649b5e25fSSatish Balay { 26749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2686849ba73SBarry Smith PetscErrorCode ierr; 26913f74950SBarry Smith PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 27013f74950SBarry Smith PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 27113f74950SBarry Smith PetscInt m,reallocs = 0,prow; 27213f74950SBarry Smith PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 27313f74950SBarry Smith PetscInt *il,ili,nextprow; 27415e8a5b3SHong Zhang PetscReal f = info->fill; 275671cb588SHong Zhang PetscTruth perm_identity; 27649b5e25fSSatish Balay 27749b5e25fSSatish Balay PetscFunctionBegin; 278*10c27e3dSHong Zhang /* 279*10c27e3dSHong Zhang This code originally uses Modified Sparse Row (MSR) storage 280*10c27e3dSHong Zhang (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 281*10c27e3dSHong Zhang Then it is rewritten so the factor B takes seqsbaij format. However the associated 282*10c27e3dSHong Zhang MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 283*10c27e3dSHong Zhang thus the original code in MSR format is still used for these cases. 284*10c27e3dSHong Zhang The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 285*10c27e3dSHong Zhang MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 286*10c27e3dSHong Zhang */ 287*10c27e3dSHong Zhang 288cb718733SHong Zhang /* check whether perm is the identity mapping */ 289671cb588SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 290*10c27e3dSHong Zhang if (bs>1 || !perm_identity){ 291*10c27e3dSHong Zhang a->permute = PETSC_TRUE; 292*10c27e3dSHong Zhang ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,B);CHKERRQ(ierr); 293*10c27e3dSHong Zhang PetscFunctionReturn(0); 294*10c27e3dSHong Zhang } 295064503c5SHong Zhang 296*10c27e3dSHong Zhang /* At present, the code below works only for perm_identity=PETSC_TRUE */ 297*10c27e3dSHong Zhang printf(" MatCholeskyFactorSymbolic_SeqSBAIJ\n"); 298*10c27e3dSHong Zhang a->permute = PETSC_FALSE; 299064503c5SHong Zhang 300064503c5SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 301064503c5SHong Zhang ai = a->i; aj = a->j; 302064503c5SHong Zhang 303064503c5SHong Zhang /* initialization */ 30413f74950SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 30513f74950SBarry Smith umax = (PetscInt)(f*ai[mbs] + 1); 30613f74950SBarry Smith ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 307064503c5SHong Zhang iu[0] = 0; 308064503c5SHong Zhang juidx = 0; /* index for ju */ 30913f74950SBarry Smith ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 310064503c5SHong Zhang q = jl + mbs; /* linked list for col index of active row */ 311064503c5SHong Zhang il = q + mbs; 312064503c5SHong Zhang for (i=0; i<mbs; i++){ 313064503c5SHong Zhang jl[i] = mbs; 314064503c5SHong Zhang q[i] = 0; 315064503c5SHong Zhang il[i] = 0; 316064503c5SHong Zhang } 317064503c5SHong Zhang 318064503c5SHong Zhang /* for each row k */ 319064503c5SHong Zhang for (k=0; k<mbs; k++){ 320064503c5SHong Zhang nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 321064503c5SHong Zhang q[k] = mbs; 322064503c5SHong Zhang /* initialize nonzero structure of k-th row to row rip[k] of A */ 323064503c5SHong Zhang jmin = ai[rip[k]] +1; /* exclude diag[k] */ 324064503c5SHong Zhang jmax = ai[rip[k]+1]; 325064503c5SHong Zhang for (j=jmin; j<jmax; j++){ 326064503c5SHong Zhang vj = rip[aj[j]]; /* col. value */ 327064503c5SHong Zhang if(vj > k){ 328064503c5SHong Zhang qm = k; 329064503c5SHong Zhang do { 330064503c5SHong Zhang m = qm; qm = q[m]; 331064503c5SHong Zhang } while(qm < vj); 332064503c5SHong Zhang if (qm == vj) { 333e005ede5SBarry Smith SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 334064503c5SHong Zhang } 335064503c5SHong Zhang nzk++; 336064503c5SHong Zhang q[m] = vj; 337064503c5SHong Zhang q[vj] = qm; 338064503c5SHong Zhang } /* if(vj > k) */ 339064503c5SHong Zhang } /* for (j=jmin; j<jmax; j++) */ 340064503c5SHong Zhang 341064503c5SHong Zhang /* modify nonzero structure of k-th row by computing fill-in 342064503c5SHong Zhang for each row i to be merged in */ 343064503c5SHong Zhang prow = k; 344064503c5SHong Zhang prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 345064503c5SHong Zhang 346064503c5SHong Zhang while (prow < k){ 347064503c5SHong Zhang nextprow = jl[prow]; 348064503c5SHong Zhang 349064503c5SHong Zhang /* merge row prow into k-th row */ 350064503c5SHong Zhang ili = il[prow]; 351064503c5SHong Zhang jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 352064503c5SHong Zhang jmax = iu[prow+1]; 353064503c5SHong Zhang qm = k; 354064503c5SHong Zhang for (j=jmin; j<jmax; j++){ 355064503c5SHong Zhang vj = ju[j]; 356064503c5SHong Zhang do { 357064503c5SHong Zhang m = qm; qm = q[m]; 358064503c5SHong Zhang } while (qm < vj); 359064503c5SHong Zhang if (qm != vj){ /* a fill */ 360064503c5SHong Zhang nzk++; q[m] = vj; q[vj] = qm; qm = vj; 361064503c5SHong Zhang } 362064503c5SHong Zhang } /* end of for (j=jmin; j<jmax; j++) */ 363064503c5SHong Zhang if (jmin < jmax){ 364064503c5SHong Zhang il[prow] = jmin; 365064503c5SHong Zhang j = ju[jmin]; 366064503c5SHong Zhang jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 367064503c5SHong Zhang } 368064503c5SHong Zhang prow = nextprow; 369064503c5SHong Zhang } 370064503c5SHong Zhang 371064503c5SHong Zhang /* update il and jl */ 372064503c5SHong Zhang if (nzk > 0){ 373064503c5SHong Zhang i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 374064503c5SHong Zhang jl[k] = jl[i]; jl[i] = k; 375064503c5SHong Zhang il[k] = iu[k] + 1; 376064503c5SHong Zhang } 377064503c5SHong Zhang iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 378064503c5SHong Zhang 379064503c5SHong Zhang /* allocate more space to ju if needed */ 380064503c5SHong Zhang if (iu[k+1] > umax) { 381064503c5SHong Zhang /* estimate how much additional space we will need */ 382064503c5SHong Zhang /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 383064503c5SHong Zhang /* just double the memory each time */ 384064503c5SHong Zhang maxadd = umax; 385064503c5SHong Zhang if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 386064503c5SHong Zhang umax += maxadd; 387064503c5SHong Zhang 388064503c5SHong Zhang /* allocate a longer ju */ 38913f74950SBarry Smith ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 39013f74950SBarry Smith ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 391064503c5SHong Zhang ierr = PetscFree(ju);CHKERRQ(ierr); 392064503c5SHong Zhang ju = jutmp; 393418422e8SSatish Balay reallocs++; /* count how many times we realloc */ 394064503c5SHong Zhang } 395064503c5SHong Zhang 396064503c5SHong Zhang /* save nonzero structure of k-th row in ju */ 397064503c5SHong Zhang ju[juidx++] = k; /* diag[k] */ 398064503c5SHong Zhang i = k; 399064503c5SHong Zhang while (nzk --) { 400064503c5SHong Zhang i = q[i]; 401064503c5SHong Zhang ju[juidx++] = i; 402064503c5SHong Zhang } 403064503c5SHong Zhang } 404064503c5SHong Zhang 405064503c5SHong Zhang if (ai[mbs] != 0) { 406064503c5SHong Zhang PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 407418422e8SSatish Balay PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 408064503c5SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 409064503c5SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 410064503c5SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 411064503c5SHong Zhang } else { 412064503c5SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 413064503c5SHong Zhang } 414064503c5SHong Zhang 415064503c5SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 416064503c5SHong Zhang /* ierr = PetscFree(q);CHKERRQ(ierr); */ 417064503c5SHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 418064503c5SHong Zhang 419064503c5SHong Zhang /* put together the new matrix */ 420be5d1d56SKris Buschelman ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 421be5d1d56SKris Buschelman ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 422be5d1d56SKris Buschelman ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 423be5d1d56SKris Buschelman 424064503c5SHong Zhang /* PetscLogObjectParent(*B,iperm); */ 425064503c5SHong Zhang b = (Mat_SeqSBAIJ*)(*B)->data; 426064503c5SHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 427064503c5SHong Zhang b->singlemalloc = PETSC_FALSE; 428064503c5SHong Zhang /* the next line frees the default space generated by the Create() */ 429064503c5SHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 430064503c5SHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 431064503c5SHong Zhang ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 432064503c5SHong Zhang b->j = ju; 433064503c5SHong Zhang b->i = iu; 434064503c5SHong Zhang b->diag = 0; 435064503c5SHong Zhang b->ilen = 0; 436064503c5SHong Zhang b->imax = 0; 437064503c5SHong Zhang b->row = perm; 43815e8a5b3SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 439064503c5SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 440064503c5SHong Zhang b->icol = perm; 441064503c5SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 442064503c5SHong Zhang ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 443064503c5SHong Zhang /* In b structure: Free imax, ilen, old a, old j. 444064503c5SHong Zhang Allocate idnew, solve_work, new a, new j */ 44513f74950SBarry Smith PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 4466c6c5352SBarry Smith b->maxnz = b->nz = iu[mbs]; 447064503c5SHong Zhang 448064503c5SHong Zhang (*B)->factor = FACTOR_CHOLESKY; 449418422e8SSatish Balay (*B)->info.factor_mallocs = reallocs; 450064503c5SHong Zhang (*B)->info.fill_ratio_given = f; 451064503c5SHong Zhang if (ai[mbs] != 0) { 452064503c5SHong Zhang (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 453064503c5SHong Zhang } else { 454064503c5SHong Zhang (*B)->info.fill_ratio_needed = 0.0; 455064503c5SHong Zhang } 456064503c5SHong Zhang 457b45a75daSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 458b45a75daSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 459e005ede5SBarry Smith (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 460064503c5SHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 461064503c5SHong Zhang PetscFunctionReturn(0); 462064503c5SHong Zhang } 4634a2ae208SSatish Balay #undef __FUNCT__ 4644a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 465dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 46649b5e25fSSatish Balay { 46749b5e25fSSatish Balay Mat C = *B; 4684c16a6a6SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 4694c16a6a6SHong Zhang IS perm = b->row; 4706849ba73SBarry Smith PetscErrorCode ierr; 47113f74950SBarry Smith PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 47213f74950SBarry Smith PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 47313f74950SBarry Smith PetscInt bs=A->bs,bs2 = a->bs2; 4744c16a6a6SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 4754c16a6a6SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 47628de702eSHong Zhang MatScalar *work; 47713f74950SBarry Smith PetscInt *pivots; 4784c16a6a6SHong Zhang 4794c16a6a6SHong Zhang PetscFunctionBegin; 4804c16a6a6SHong Zhang /* initialization */ 48182502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 4824c16a6a6SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 48313f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 48428de702eSHong Zhang jl = il + mbs; 4854c16a6a6SHong Zhang for (i=0; i<mbs; i++) { 4864c16a6a6SHong Zhang jl[i] = mbs; il[0] = 0; 4874c16a6a6SHong Zhang } 488b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 48928de702eSHong Zhang uik = dk + bs2; 49028de702eSHong Zhang work = uik + bs2; 49113f74950SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 4924c16a6a6SHong Zhang 4934c16a6a6SHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 4944c16a6a6SHong Zhang 4954c16a6a6SHong Zhang /* check permutation */ 4964c16a6a6SHong Zhang if (!a->permute){ 4974c16a6a6SHong Zhang ai = a->i; aj = a->j; aa = a->a; 4984c16a6a6SHong Zhang } else { 4994c16a6a6SHong Zhang ai = a->inew; aj = a->jnew; 50082502324SSatish Balay ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 5014c16a6a6SHong Zhang ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 50213f74950SBarry Smith ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 50313f74950SBarry Smith ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 5044c16a6a6SHong Zhang 5054c16a6a6SHong Zhang for (i=0; i<mbs; i++){ 5064c16a6a6SHong Zhang jmin = ai[i]; jmax = ai[i+1]; 5074c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 5084c16a6a6SHong Zhang while (a2anew[j] != j){ 5094c16a6a6SHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 5104c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 5114c16a6a6SHong Zhang dk[k1] = aa[k*bs2+k1]; 5124c16a6a6SHong Zhang aa[k*bs2+k1] = aa[j*bs2+k1]; 5134c16a6a6SHong Zhang aa[j*bs2+k1] = dk[k1]; 5144c16a6a6SHong Zhang } 5154c16a6a6SHong Zhang } 5164c16a6a6SHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 5174c16a6a6SHong Zhang if (i > aj[j]){ 5184c16a6a6SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 5194c16a6a6SHong Zhang ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 5204c16a6a6SHong Zhang for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 5214c16a6a6SHong Zhang for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 5224c16a6a6SHong Zhang for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 5234c16a6a6SHong Zhang } 5244c16a6a6SHong Zhang } 5254c16a6a6SHong Zhang } 5264c16a6a6SHong Zhang } 527323b833fSBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 5284c16a6a6SHong Zhang } 5294c16a6a6SHong Zhang 5304c16a6a6SHong Zhang /* for each row k */ 5314c16a6a6SHong Zhang for (k = 0; k<mbs; k++){ 5324c16a6a6SHong Zhang 5334c16a6a6SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 5344c16a6a6SHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 535057f5ba7SHong Zhang 5364c16a6a6SHong Zhang ap = aa + jmin*bs2; 5374c16a6a6SHong Zhang for (j = jmin; j < jmax; j++){ 5384c16a6a6SHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 5394c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 5404c16a6a6SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 5414c16a6a6SHong Zhang } 5424c16a6a6SHong Zhang 5434c16a6a6SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 5444c16a6a6SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5454c16a6a6SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 5464c16a6a6SHong Zhang 547057f5ba7SHong Zhang while (i < k){ 5484c16a6a6SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 5494c16a6a6SHong Zhang 5504c16a6a6SHong Zhang /* compute multiplier */ 5514c16a6a6SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 5524c16a6a6SHong Zhang 5534c16a6a6SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 5544c16a6a6SHong Zhang diag = ba + i*bs2; 5554c16a6a6SHong Zhang u = ba + ili*bs2; 5564c16a6a6SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5574c16a6a6SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 5584c16a6a6SHong Zhang 5594c16a6a6SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 5604c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 5614c16a6a6SHong Zhang 5624c16a6a6SHong Zhang /* update -U(i,k) */ 5634c16a6a6SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5644c16a6a6SHong Zhang 5654c16a6a6SHong Zhang /* add multiple of row i to k-th row ... */ 5664c16a6a6SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 5674c16a6a6SHong Zhang if (jmin < jmax){ 5684c16a6a6SHong Zhang for (j=jmin; j<jmax; j++) { 5694c16a6a6SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 5704c16a6a6SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 5714c16a6a6SHong Zhang u = ba + j*bs2; 5724c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 5734c16a6a6SHong Zhang } 5744c16a6a6SHong Zhang 5754c16a6a6SHong Zhang /* ... add i to row list for next nonzero entry */ 5764c16a6a6SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 5774c16a6a6SHong Zhang j = bj[jmin]; 5784c16a6a6SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 5794c16a6a6SHong Zhang } 5804c16a6a6SHong Zhang i = nexti; 5814c16a6a6SHong Zhang } 5824c16a6a6SHong Zhang 5834c16a6a6SHong Zhang /* save nonzero entries in k-th row of U ... */ 5844c16a6a6SHong Zhang 5854c16a6a6SHong Zhang /* invert diagonal block */ 5864c16a6a6SHong Zhang diag = ba+k*bs2; 5874c16a6a6SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 588d230e6fdSBarry Smith ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 5894c16a6a6SHong Zhang 5904c16a6a6SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 5914c16a6a6SHong Zhang if (jmin < jmax) { 5924c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 5934c16a6a6SHong Zhang vj = bj[j]; /* block col. index of U */ 5944c16a6a6SHong Zhang u = ba + j*bs2; 5954c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 5964c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 5974c16a6a6SHong Zhang *u++ = *rtmp_ptr; 5984c16a6a6SHong Zhang *rtmp_ptr++ = 0.0; 5994c16a6a6SHong Zhang } 6004c16a6a6SHong Zhang } 6014c16a6a6SHong Zhang 6024c16a6a6SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 6034c16a6a6SHong Zhang il[k] = jmin; 6044c16a6a6SHong Zhang i = bj[jmin]; 6054c16a6a6SHong Zhang jl[k] = jl[i]; jl[i] = k; 6064c16a6a6SHong Zhang } 6074c16a6a6SHong Zhang } 6084c16a6a6SHong Zhang 6094c16a6a6SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 6104c16a6a6SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 6114c16a6a6SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 6124c16a6a6SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 6134c16a6a6SHong Zhang if (a->permute){ 6144c16a6a6SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 6154c16a6a6SHong Zhang } 6164c16a6a6SHong Zhang 6174c16a6a6SHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 6184c16a6a6SHong Zhang C->factor = FACTOR_CHOLESKY; 6194c16a6a6SHong Zhang C->assembled = PETSC_TRUE; 6204c16a6a6SHong Zhang C->preallocated = PETSC_TRUE; 621b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 6224c16a6a6SHong Zhang PetscFunctionReturn(0); 6234c16a6a6SHong Zhang } 624d4edadd4SHong Zhang 6254a2ae208SSatish Balay #undef __FUNCT__ 6264a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 627dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 628671cb588SHong Zhang { 629671cb588SHong Zhang Mat C = *B; 630671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 631dfbe8321SBarry Smith PetscErrorCode ierr; 63213f74950SBarry Smith PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 63313f74950SBarry Smith PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 63413f74950SBarry Smith PetscInt bs=A->bs,bs2 = a->bs2; 635671cb588SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 636671cb588SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 63728de702eSHong Zhang MatScalar *work; 63813f74950SBarry Smith PetscInt *pivots; 639671cb588SHong Zhang 640671cb588SHong Zhang PetscFunctionBegin; 641671cb588SHong Zhang /* initialization */ 642671cb588SHong Zhang 64382502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 644671cb588SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 64513f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 64628de702eSHong Zhang jl = il + mbs; 647671cb588SHong Zhang for (i=0; i<mbs; i++) { 648671cb588SHong Zhang jl[i] = mbs; il[0] = 0; 649671cb588SHong Zhang } 650b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 65128de702eSHong Zhang uik = dk + bs2; 65228de702eSHong Zhang work = uik + bs2; 65313f74950SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 654671cb588SHong Zhang 655671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 656671cb588SHong Zhang 657671cb588SHong Zhang /* for each row k */ 658671cb588SHong Zhang for (k = 0; k<mbs; k++){ 659671cb588SHong Zhang 660671cb588SHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 661671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 662671cb588SHong Zhang ap = aa + jmin*bs2; 663671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 664671cb588SHong Zhang vj = aj[j]; /* block col. index */ 665671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 666671cb588SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 667671cb588SHong Zhang } 668671cb588SHong Zhang 669671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 670671cb588SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 672671cb588SHong Zhang 673057f5ba7SHong Zhang while (i < k){ 674671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 675671cb588SHong Zhang 676671cb588SHong Zhang /* compute multiplier */ 677671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 678671cb588SHong Zhang 679671cb588SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 680671cb588SHong Zhang diag = ba + i*bs2; 681671cb588SHong Zhang u = ba + ili*bs2; 682671cb588SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 683671cb588SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 684671cb588SHong Zhang 685671cb588SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 686671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 687671cb588SHong Zhang 688671cb588SHong Zhang /* update -U(i,k) */ 689671cb588SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 690671cb588SHong Zhang 691671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 692671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 693671cb588SHong Zhang if (jmin < jmax){ 694671cb588SHong Zhang for (j=jmin; j<jmax; j++) { 695671cb588SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 696671cb588SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 697671cb588SHong Zhang u = ba + j*bs2; 698671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 699671cb588SHong Zhang } 700671cb588SHong Zhang 701671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 702671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 703671cb588SHong Zhang j = bj[jmin]; 704671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 705671cb588SHong Zhang } 706671cb588SHong Zhang i = nexti; 707671cb588SHong Zhang } 708671cb588SHong Zhang 709671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 710671cb588SHong Zhang 711671cb588SHong Zhang /* invert diagonal block */ 712671cb588SHong Zhang diag = ba+k*bs2; 713671cb588SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 714d230e6fdSBarry Smith ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 715671cb588SHong Zhang 716671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 717671cb588SHong Zhang if (jmin < jmax) { 718671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 719671cb588SHong Zhang vj = bj[j]; /* block col. index of U */ 720671cb588SHong Zhang u = ba + j*bs2; 721671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 722671cb588SHong Zhang for (k1=0; k1<bs2; k1++){ 723671cb588SHong Zhang *u++ = *rtmp_ptr; 724671cb588SHong Zhang *rtmp_ptr++ = 0.0; 725671cb588SHong Zhang } 726671cb588SHong Zhang } 727671cb588SHong Zhang 728671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 729671cb588SHong Zhang il[k] = jmin; 730671cb588SHong Zhang i = bj[jmin]; 731671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 732671cb588SHong Zhang } 733671cb588SHong Zhang } 734671cb588SHong Zhang 735671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 736671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 737671cb588SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 738671cb588SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 739671cb588SHong Zhang 740671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 741671cb588SHong Zhang C->assembled = PETSC_TRUE; 742671cb588SHong Zhang C->preallocated = PETSC_TRUE; 743b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 744671cb588SHong Zhang PetscFunctionReturn(0); 745671cb588SHong Zhang } 746671cb588SHong Zhang 74749b5e25fSSatish Balay /* 748fcf159c0SHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 749cc0c071aSHong Zhang Version for blocks 2 by 2. 75049b5e25fSSatish Balay */ 7514a2ae208SSatish Balay #undef __FUNCT__ 7524a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 753dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 75449b5e25fSSatish Balay { 75549b5e25fSSatish Balay Mat C = *B; 75691602c66SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 757cc0c071aSHong Zhang IS perm = b->row; 7586849ba73SBarry Smith PetscErrorCode ierr; 75913f74950SBarry Smith PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 76013f74950SBarry Smith PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 761a1723e09SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 762cc0c071aSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 76349b5e25fSSatish Balay 76449b5e25fSSatish Balay PetscFunctionBegin; 76591602c66SHong Zhang /* initialization */ 76691602c66SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 76791602c66SHong Zhang window U(0:k, k:mbs-1). 76891602c66SHong Zhang jl: list of rows to be added to uneliminated rows 76991602c66SHong Zhang i>= k: jl(i) is the first row to be added to row i 77091602c66SHong Zhang i< k: jl(i) is the row following row i in some list of rows 77191602c66SHong Zhang jl(i) = mbs indicates the end of a list 77291602c66SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 77391602c66SHong Zhang row i of U */ 77482502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 775cc0c071aSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 77613f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 77728de702eSHong Zhang jl = il + mbs; 77891602c66SHong Zhang for (i=0; i<mbs; i++) { 7793845f261SHong Zhang jl[i] = mbs; il[0] = 0; 78091602c66SHong Zhang } 78182502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 78228de702eSHong Zhang uik = dk + 4; 783cc0c071aSHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 784cc0c071aSHong Zhang 785cc0c071aSHong Zhang /* check permutation */ 786cc0c071aSHong Zhang if (!a->permute){ 787cc0c071aSHong Zhang ai = a->i; aj = a->j; aa = a->a; 788cc0c071aSHong Zhang } else { 789cc0c071aSHong Zhang ai = a->inew; aj = a->jnew; 79082502324SSatish Balay ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 791cc0c071aSHong Zhang ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 79213f74950SBarry Smith ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 79313f74950SBarry Smith ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 794cc0c071aSHong Zhang 795cc0c071aSHong Zhang for (i=0; i<mbs; i++){ 796cc0c071aSHong Zhang jmin = ai[i]; jmax = ai[i+1]; 797cc0c071aSHong Zhang for (j=jmin; j<jmax; j++){ 798cc0c071aSHong Zhang while (a2anew[j] != j){ 799cc0c071aSHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 800cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 801cc0c071aSHong Zhang dk[k1] = aa[k*4+k1]; 802cc0c071aSHong Zhang aa[k*4+k1] = aa[j*4+k1]; 803cc0c071aSHong Zhang aa[j*4+k1] = dk[k1]; 804cc0c071aSHong Zhang } 805cc0c071aSHong Zhang } 806cc0c071aSHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 807cc0c071aSHong Zhang if (i > aj[j]){ 808a1723e09SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 809cc0c071aSHong Zhang ap = aa + j*4; /* ptr to the beginning of the block */ 810cc0c071aSHong Zhang dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 811cc0c071aSHong Zhang ap[1] = ap[2]; 812cc0c071aSHong Zhang ap[2] = dk[1]; 813cc0c071aSHong Zhang } 814cc0c071aSHong Zhang } 815cc0c071aSHong Zhang } 816ac355199SBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 817cc0c071aSHong Zhang } 8183845f261SHong Zhang 81991602c66SHong Zhang /* for each row k */ 82091602c66SHong Zhang for (k = 0; k<mbs; k++){ 82191602c66SHong Zhang 82291602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 823cc0c071aSHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 824cc0c071aSHong Zhang ap = aa + jmin*4; 82591602c66SHong Zhang for (j = jmin; j < jmax; j++){ 826cc0c071aSHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 827cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 828cc0c071aSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 82991602c66SHong Zhang } 83091602c66SHong Zhang 83191602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 832cc0c071aSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 83391602c66SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 83491602c66SHong Zhang 835057f5ba7SHong Zhang while (i < k){ 83691602c66SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 83791602c66SHong Zhang 8383845f261SHong Zhang /* compute multiplier */ 83991602c66SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 8403845f261SHong Zhang 8413845f261SHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 842cc0c071aSHong Zhang diag = ba + i*4; 843cc0c071aSHong Zhang u = ba + ili*4; 844cc0c071aSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 845cc0c071aSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 846cc0c071aSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 847cc0c071aSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 8483845f261SHong Zhang 8493845f261SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 850cc0c071aSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 851cc0c071aSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 852cc0c071aSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 853cc0c071aSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 8543845f261SHong Zhang 8553845f261SHong Zhang /* update -U(i,k): ba[ili] = uik */ 856cc0c071aSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 85791602c66SHong Zhang 85891602c66SHong Zhang /* add multiple of row i to k-th row ... */ 85991602c66SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 86091602c66SHong Zhang if (jmin < jmax){ 8613845f261SHong Zhang for (j=jmin; j<jmax; j++) { 8623845f261SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 863cc0c071aSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 864cc0c071aSHong Zhang u = ba + j*4; 865cc0c071aSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 866cc0c071aSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 867cc0c071aSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 868cc0c071aSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 8693845f261SHong Zhang } 8703845f261SHong Zhang 87191602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 87291602c66SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 87391602c66SHong Zhang j = bj[jmin]; 87491602c66SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 87591602c66SHong Zhang } 876a1723e09SHong Zhang i = nexti; 87791602c66SHong Zhang } 878cc0c071aSHong Zhang 87991602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 8803845f261SHong Zhang 881cc0c071aSHong Zhang /* invert diagonal block */ 882cc0c071aSHong Zhang diag = ba+k*4; 883cc0c071aSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 8843845f261SHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 8853845f261SHong Zhang 88691602c66SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 88791602c66SHong Zhang if (jmin < jmax) { 88891602c66SHong Zhang for (j=jmin; j<jmax; j++){ 889cc0c071aSHong Zhang vj = bj[j]; /* block col. index of U */ 890cc0c071aSHong Zhang u = ba + j*4; 891cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 892cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 893cc0c071aSHong Zhang *u++ = *rtmp_ptr; 894cc0c071aSHong Zhang *rtmp_ptr++ = 0.0; 895cc0c071aSHong Zhang } 896cc0c071aSHong Zhang } 8973845f261SHong Zhang 89891602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 89991602c66SHong Zhang il[k] = jmin; 90091602c66SHong Zhang i = bj[jmin]; 90191602c66SHong Zhang jl[k] = jl[i]; jl[i] = k; 90291602c66SHong Zhang } 90391602c66SHong Zhang } 9043845f261SHong Zhang 90549b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 90691602c66SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 9073845f261SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 90891602c66SHong Zhang if (a->permute) { 90991602c66SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 91091602c66SHong Zhang } 911cc0c071aSHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 91291602c66SHong Zhang C->factor = FACTOR_CHOLESKY; 91349b5e25fSSatish Balay C->assembled = PETSC_TRUE; 9145c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 915b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 91649b5e25fSSatish Balay PetscFunctionReturn(0); 91749b5e25fSSatish Balay } 91891602c66SHong Zhang 91949b5e25fSSatish Balay /* 92049b5e25fSSatish Balay Version for when blocks are 2 by 2 Using natural ordering 92149b5e25fSSatish Balay */ 9224a2ae208SSatish Balay #undef __FUNCT__ 9234a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 924dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 92549b5e25fSSatish Balay { 92649b5e25fSSatish Balay Mat C = *B; 927ab27746eSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 928dfbe8321SBarry Smith PetscErrorCode ierr; 92913f74950SBarry Smith PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 93013f74950SBarry Smith PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 931ab27746eSHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 932ab27746eSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 93349b5e25fSSatish Balay 93449b5e25fSSatish Balay PetscFunctionBegin; 935ab27746eSHong Zhang /* initialization */ 936ab27746eSHong Zhang /* il and jl record the first nonzero element in each row of the accessing 937ab27746eSHong Zhang window U(0:k, k:mbs-1). 938ab27746eSHong Zhang jl: list of rows to be added to uneliminated rows 939ab27746eSHong Zhang i>= k: jl(i) is the first row to be added to row i 940ab27746eSHong Zhang i< k: jl(i) is the row following row i in some list of rows 941ab27746eSHong Zhang jl(i) = mbs indicates the end of a list 942ab27746eSHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 943ab27746eSHong Zhang row i of U */ 94482502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 945ab27746eSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 94613f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 94728de702eSHong Zhang jl = il + mbs; 948ab27746eSHong Zhang for (i=0; i<mbs; i++) { 949ab27746eSHong Zhang jl[i] = mbs; il[0] = 0; 95049b5e25fSSatish Balay } 95182502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 95228de702eSHong Zhang uik = dk + 4; 953ab27746eSHong Zhang 954ab27746eSHong Zhang ai = a->i; aj = a->j; aa = a->a; 955ab27746eSHong Zhang 956ab27746eSHong Zhang /* for each row k */ 957ab27746eSHong Zhang for (k = 0; k<mbs; k++){ 958ab27746eSHong Zhang 959ab27746eSHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 960ab27746eSHong Zhang jmin = ai[k]; jmax = ai[k+1]; 961ab27746eSHong Zhang ap = aa + jmin*4; 962ab27746eSHong Zhang for (j = jmin; j < jmax; j++){ 963ab27746eSHong Zhang vj = aj[j]; /* block col. index */ 964ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 965ab27746eSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 96649b5e25fSSatish Balay } 967ab27746eSHong Zhang 968ab27746eSHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 969ab27746eSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 970ab27746eSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 971ab27746eSHong Zhang 972057f5ba7SHong Zhang while (i < k){ 973ab27746eSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 974ab27746eSHong Zhang 975ab27746eSHong Zhang /* compute multiplier */ 976ab27746eSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 977ab27746eSHong Zhang 978ab27746eSHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 979ab27746eSHong Zhang diag = ba + i*4; 980ab27746eSHong Zhang u = ba + ili*4; 981ab27746eSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 982ab27746eSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 983ab27746eSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 984ab27746eSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 985ab27746eSHong Zhang 986ab27746eSHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 987ab27746eSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 988ab27746eSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 989ab27746eSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 990ab27746eSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 991ab27746eSHong Zhang 992ab27746eSHong Zhang /* update -U(i,k): ba[ili] = uik */ 993ab27746eSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 994ab27746eSHong Zhang 995ab27746eSHong Zhang /* add multiple of row i to k-th row ... */ 996ab27746eSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 997ab27746eSHong Zhang if (jmin < jmax){ 998ab27746eSHong Zhang for (j=jmin; j<jmax; j++) { 999ab27746eSHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1000ab27746eSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 1001ab27746eSHong Zhang u = ba + j*4; 1002ab27746eSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1003ab27746eSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1004ab27746eSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1005ab27746eSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 100649b5e25fSSatish Balay } 1007ab27746eSHong Zhang 1008ab27746eSHong Zhang /* ... add i to row list for next nonzero entry */ 1009ab27746eSHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1010ab27746eSHong Zhang j = bj[jmin]; 1011ab27746eSHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 101249b5e25fSSatish Balay } 1013ab27746eSHong Zhang i = nexti; 101449b5e25fSSatish Balay } 1015ab27746eSHong Zhang 1016ab27746eSHong Zhang /* save nonzero entries in k-th row of U ... */ 1017ab27746eSHong Zhang 101849b5e25fSSatish Balay /* invert diagonal block */ 1019ab27746eSHong Zhang diag = ba+k*4; 1020ab27746eSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1021ab27746eSHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1022ab27746eSHong Zhang 1023ab27746eSHong Zhang jmin = bi[k]; jmax = bi[k+1]; 1024ab27746eSHong Zhang if (jmin < jmax) { 1025ab27746eSHong Zhang for (j=jmin; j<jmax; j++){ 1026ab27746eSHong Zhang vj = bj[j]; /* block col. index of U */ 1027ab27746eSHong Zhang u = ba + j*4; 1028ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 1029ab27746eSHong Zhang for (k1=0; k1<4; k1++){ 1030ab27746eSHong Zhang *u++ = *rtmp_ptr; 1031ab27746eSHong Zhang *rtmp_ptr++ = 0.0; 1032ab27746eSHong Zhang } 1033ab27746eSHong Zhang } 1034ab27746eSHong Zhang 1035ab27746eSHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1036ab27746eSHong Zhang il[k] = jmin; 1037ab27746eSHong Zhang i = bj[jmin]; 1038ab27746eSHong Zhang jl[k] = jl[i]; jl[i] = k; 1039ab27746eSHong Zhang } 104049b5e25fSSatish Balay } 104149b5e25fSSatish Balay 104249b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1043ab27746eSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1044ab27746eSHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 1045ab27746eSHong Zhang 1046ab27746eSHong Zhang C->factor = FACTOR_CHOLESKY; 104749b5e25fSSatish Balay C->assembled = PETSC_TRUE; 10485c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 1049b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 105049b5e25fSSatish Balay PetscFunctionReturn(0); 105149b5e25fSSatish Balay } 105249b5e25fSSatish Balay 105349b5e25fSSatish Balay /* 10545c0bcdfcSHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 105591602c66SHong Zhang Version for blocks are 1 by 1. 105649b5e25fSSatish Balay */ 10574a2ae208SSatish Balay #undef __FUNCT__ 10584a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1059dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 106049b5e25fSSatish Balay { 106149b5e25fSSatish Balay Mat C = *B; 106249b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 106349b5e25fSSatish Balay IS ip = b->row; 10646849ba73SBarry Smith PetscErrorCode ierr; 106513f74950SBarry Smith PetscInt *rip,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 106613f74950SBarry Smith PetscInt *ai,*aj,*r; 106713f74950SBarry Smith PetscInt k,jmin,jmax,*jl,*il,vj,nexti,ili; 1068066653e3SSatish Balay MatScalar *rtmp; 10692d007305SHong Zhang MatScalar *ba = b->a,*aa,ak; 107049b5e25fSSatish Balay MatScalar dk,uikdi; 107149b5e25fSSatish Balay 107249b5e25fSSatish Balay PetscFunctionBegin; 107349b5e25fSSatish Balay ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1074cb718733SHong Zhang if (!a->permute){ 10752d007305SHong Zhang ai = a->i; aj = a->j; aa = a->a; 10762d007305SHong Zhang } else { 10772d007305SHong Zhang ai = a->inew; aj = a->jnew; 107882502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1079cb718733SHong Zhang ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 108013f74950SBarry Smith ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&r);CHKERRQ(ierr); 108113f74950SBarry Smith ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 10822d007305SHong Zhang 10832d007305SHong Zhang jmin = ai[0]; jmax = ai[mbs]; 10842d007305SHong Zhang for (j=jmin; j<jmax; j++){ 1085cb718733SHong Zhang while (r[j] != j){ 10862d007305SHong Zhang k = r[j]; r[j] = r[k]; r[k] = k; 10872d007305SHong Zhang ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 10882d007305SHong Zhang } 10892d007305SHong Zhang } 1090ac355199SBarry Smith ierr = PetscFree(r);CHKERRQ(ierr); 10912d007305SHong Zhang } 10922d007305SHong Zhang 109391602c66SHong Zhang /* initialization */ 109449b5e25fSSatish Balay /* il and jl record the first nonzero element in each row of the accessing 109549b5e25fSSatish Balay window U(0:k, k:mbs-1). 109649b5e25fSSatish Balay jl: list of rows to be added to uneliminated rows 109749b5e25fSSatish Balay i>= k: jl(i) is the first row to be added to row i 109849b5e25fSSatish Balay i< k: jl(i) is the row following row i in some list of rows 109949b5e25fSSatish Balay jl(i) = mbs indicates the end of a list 110049b5e25fSSatish Balay il(i): points to the first nonzero element in columns k,...,mbs-1 of 110149b5e25fSSatish Balay row i of U */ 110282502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 110313f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 110428de702eSHong Zhang jl = il + mbs; 110549b5e25fSSatish Balay for (i=0; i<mbs; i++) { 110649b5e25fSSatish Balay rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 110749b5e25fSSatish Balay } 110849b5e25fSSatish Balay 110991602c66SHong Zhang /* for each row k */ 111049b5e25fSSatish Balay for (k = 0; k<mbs; k++){ 111149b5e25fSSatish Balay 111291602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 111349b5e25fSSatish Balay jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1114057f5ba7SHong Zhang 111549b5e25fSSatish Balay for (j = jmin; j < jmax; j++){ 1116351d0355SHong Zhang vj = rip[aj[j]]; 1117ab27746eSHong Zhang rtmp[vj] = aa[j]; 111849b5e25fSSatish Balay } 111949b5e25fSSatish Balay 112091602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 112149b5e25fSSatish Balay dk = rtmp[k]; 112249b5e25fSSatish Balay i = jl[k]; /* first row to be added to k_th row */ 112349b5e25fSSatish Balay 1124057f5ba7SHong Zhang while (i < k){ 112549b5e25fSSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 112649b5e25fSSatish Balay 112791602c66SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 112849b5e25fSSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 112949b5e25fSSatish Balay uikdi = - ba[ili]*ba[i]; 113049b5e25fSSatish Balay dk += uikdi*ba[ili]; 1131658e7b3eSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 113249b5e25fSSatish Balay 113391602c66SHong Zhang /* add multiple of row i to k-th row ... */ 113449b5e25fSSatish Balay jmin = ili + 1; jmax = bi[i+1]; 113549b5e25fSSatish Balay if (jmin < jmax){ 113649b5e25fSSatish Balay for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 113791602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 113849b5e25fSSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 113949b5e25fSSatish Balay j = bj[jmin]; 114049b5e25fSSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 114149b5e25fSSatish Balay } 1142ab27746eSHong Zhang i = nexti; 114349b5e25fSSatish Balay } 114449b5e25fSSatish Balay 114591602c66SHong Zhang /* check for zero pivot and save diagoanl element */ 11465b8514ebSBarry Smith if (dk == 0.0){ 114729bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1148671cb588SHong Zhang /* 11491223c01bSHong Zhang } else if (PetscRealPart(dk) < 0.0){ 11501223c01bSHong Zhang SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1151671cb588SHong Zhang */ 115249b5e25fSSatish Balay } 115349b5e25fSSatish Balay 115491602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 1155f3dd7b2dSKris Buschelman ba[k] = 1.0/dk; 115649b5e25fSSatish Balay jmin = bi[k]; jmax = bi[k+1]; 115749b5e25fSSatish Balay if (jmin < jmax) { 115849b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 1159cc0c071aSHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 116049b5e25fSSatish Balay } 116191602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 116249b5e25fSSatish Balay il[k] = jmin; 116349b5e25fSSatish Balay i = bj[jmin]; 116449b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 116549b5e25fSSatish Balay } 116649b5e25fSSatish Balay } 116749b5e25fSSatish Balay 116849b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 116949b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 1170cb718733SHong Zhang if (a->permute){ 1171cb718733SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 1172cb718733SHong Zhang } 117349b5e25fSSatish Balay 117449b5e25fSSatish Balay ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 11758be8c0c7SHong Zhang C->factor = FACTOR_CHOLESKY; 117649b5e25fSSatish Balay C->assembled = PETSC_TRUE; 11775c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 1178b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 117949b5e25fSSatish Balay PetscFunctionReturn(0); 118049b5e25fSSatish Balay } 118149b5e25fSSatish Balay 1182671cb588SHong Zhang /* 1183671cb588SHong Zhang Version for when blocks are 1 by 1 Using natural ordering 1184671cb588SHong Zhang */ 11854a2ae208SSatish Balay #undef __FUNCT__ 11864a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1187dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1188671cb588SHong Zhang { 1189671cb588SHong Zhang Mat C = *B; 1190671cb588SHong Zhang Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1191dfbe8321SBarry Smith PetscErrorCode ierr; 119213f74950SBarry Smith PetscInt i,j,mbs = a->mbs; 119313f74950SBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 119413f74950SBarry Smith PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1195653a6975SHong Zhang MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 119621c26570Svictorle PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 119721c26570Svictorle PetscTruth damp,chshift; 119813f74950SBarry Smith PetscInt nshift=0; 1199653a6975SHong Zhang 1200653a6975SHong Zhang PetscFunctionBegin; 1201653a6975SHong Zhang /* initialization */ 1202653a6975SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 1203653a6975SHong Zhang window U(0:k, k:mbs-1). 1204653a6975SHong Zhang jl: list of rows to be added to uneliminated rows 1205653a6975SHong Zhang i>= k: jl(i) is the first row to be added to row i 1206653a6975SHong Zhang i< k: jl(i) is the row following row i in some list of rows 1207653a6975SHong Zhang jl(i) = mbs indicates the end of a list 1208653a6975SHong Zhang il(i): points to the first nonzero element in U(i,k:mbs-1) 1209653a6975SHong Zhang */ 1210653a6975SHong Zhang ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 121113f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1212653a6975SHong Zhang jl = il + mbs; 1213b00f7748SHong Zhang 121421c26570Svictorle shift_amount = 0; 1215b00f7748SHong Zhang do { 1216b00f7748SHong Zhang damp = PETSC_FALSE; 121721c26570Svictorle chshift = PETSC_FALSE; 1218653a6975SHong Zhang for (i=0; i<mbs; i++) { 1219653a6975SHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1220653a6975SHong Zhang } 1221653a6975SHong Zhang 1222b00f7748SHong Zhang for (k = 0; k<mbs; k++){ /* row k */ 1223653a6975SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 1224653a6975SHong Zhang nz = ai[k+1] - ai[k]; 1225653a6975SHong Zhang acol = aj + ai[k]; 1226653a6975SHong Zhang aval = aa + ai[k]; 1227653a6975SHong Zhang bval = ba + bi[k]; 1228653a6975SHong Zhang while (nz -- ){ 1229653a6975SHong Zhang rtmp[*acol++] = *aval++; 1230653a6975SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 1231653a6975SHong Zhang } 1232b00f7748SHong Zhang /* damp the diagonal of the matrix */ 123321c26570Svictorle if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1234653a6975SHong Zhang 1235653a6975SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1236653a6975SHong Zhang dk = rtmp[k]; 1237653a6975SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 1238653a6975SHong Zhang 1239653a6975SHong Zhang while (i < k){ 1240653a6975SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 1241653a6975SHong Zhang 1242653a6975SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 1243653a6975SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1244653a6975SHong Zhang uikdi = - ba[ili]*ba[bi[i]]; 1245653a6975SHong Zhang dk += uikdi*ba[ili]; 1246653a6975SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 1247653a6975SHong Zhang 1248653a6975SHong Zhang /* add multiple of row i to k-th row ... */ 1249653a6975SHong Zhang jmin = ili + 1; 1250653a6975SHong Zhang nz = bi[i+1] - jmin; 1251653a6975SHong Zhang if (nz > 0){ 1252653a6975SHong Zhang bcol = bj + jmin; 1253653a6975SHong Zhang bval = ba + jmin; 1254653a6975SHong Zhang while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1255653a6975SHong Zhang /* ... add i to row list for next nonzero entry */ 1256653a6975SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1257653a6975SHong Zhang j = bj[jmin]; 1258653a6975SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 1259653a6975SHong Zhang } 1260653a6975SHong Zhang i = nexti; 1261653a6975SHong Zhang } 1262653a6975SHong Zhang 126321c26570Svictorle if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1264d530a6e7Svictorle /* calculate a shift that would make this row diagonally dominant */ 126529f69fd4SBarry Smith PetscReal rs = PetscAbs(PetscRealPart(dk)); 126621c26570Svictorle jmin = bi[k]+1; 126721c26570Svictorle nz = bi[k+1] - jmin; 126821c26570Svictorle if (nz){ 126921c26570Svictorle bcol = bj + jmin; 127021c26570Svictorle bval = ba + jmin; 127121c26570Svictorle while (nz--){ 127221c26570Svictorle rs += PetscAbsScalar(rtmp[*bcol++]); 127321c26570Svictorle } 127421c26570Svictorle } 1275d530a6e7Svictorle /* if this shift is less than the previous, just up the previous 1276d530a6e7Svictorle one by a bit */ 1277d530a6e7Svictorle shift_amount = PetscMax(rs,1.1*shift_amount); 127821c26570Svictorle chshift = PETSC_TRUE; 1279d530a6e7Svictorle /* Unlike in the ILU case there is no exit condition on nshift: 1280d530a6e7Svictorle we increase the shift until it converges. There is no guarantee that 1281d530a6e7Svictorle this algorithm converges faster or slower, or is better or worse 1282d530a6e7Svictorle than the ILU algorithm. */ 128321c26570Svictorle nshift++; 128421c26570Svictorle break; 128521c26570Svictorle } 1286ffa0b812SHong Zhang if (PetscRealPart(dk) < zeropivot){ 1287ffa0b812SHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1288b00f7748SHong Zhang if (damping > 0.0) { 1289b00f7748SHong Zhang if (ndamp) damping *= 2.0; 1290b00f7748SHong Zhang damp = PETSC_TRUE; 1291b00f7748SHong Zhang ndamp++; 1292b00f7748SHong Zhang break; 1293481c2c92SHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 129477431f27SBarry Smith SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1295481c2c92SHong Zhang } else { 129677431f27SBarry Smith PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1297b00f7748SHong Zhang } 1298064503c5SHong Zhang } 1299653a6975SHong Zhang 1300653a6975SHong Zhang /* save nonzero entries in k-th row of U ... */ 130177431f27SBarry Smith /* printf("%D, dk: %g, 1/dk: %g\n",k,dk,1/dk); */ 1302653a6975SHong Zhang ba[bi[k]] = 1.0/dk; 1303653a6975SHong Zhang jmin = bi[k]+1; 1304653a6975SHong Zhang nz = bi[k+1] - jmin; 1305653a6975SHong Zhang if (nz){ 1306653a6975SHong Zhang bcol = bj + jmin; 1307653a6975SHong Zhang bval = ba + jmin; 1308653a6975SHong Zhang while (nz--){ 1309653a6975SHong Zhang *bval++ = rtmp[*bcol]; 1310653a6975SHong Zhang rtmp[*bcol++] = 0.0; 1311653a6975SHong Zhang } 1312653a6975SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1313653a6975SHong Zhang il[k] = jmin; 1314653a6975SHong Zhang i = bj[jmin]; 1315653a6975SHong Zhang jl[k] = jl[i]; jl[i] = k; 1316653a6975SHong Zhang } 1317b00f7748SHong Zhang } /* end of for (k = 0; k<mbs; k++) */ 131821c26570Svictorle } while (damp||chshift); 1319653a6975SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1320653a6975SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1321653a6975SHong Zhang 1322653a6975SHong Zhang C->factor = FACTOR_CHOLESKY; 1323653a6975SHong Zhang C->assembled = PETSC_TRUE; 1324653a6975SHong Zhang C->preallocated = PETSC_TRUE; 1325653a6975SHong Zhang PetscLogFlops(b->mbs); 1326b00f7748SHong Zhang if (ndamp) { 132777431f27SBarry Smith PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1328b00f7748SHong Zhang } 1329fb10cecfSBarry Smith if (nshift) { 133077431f27SBarry Smith PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1331fb10cecfSBarry Smith } 1332fb10cecfSBarry Smith 1333653a6975SHong Zhang PetscFunctionReturn(0); 1334653a6975SHong Zhang } 1335653a6975SHong Zhang 1336653a6975SHong Zhang #undef __FUNCT__ 13374a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1338dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 133949b5e25fSSatish Balay { 1340dfbe8321SBarry Smith PetscErrorCode ierr; 134149b5e25fSSatish Balay Mat C; 134249b5e25fSSatish Balay 134349b5e25fSSatish Balay PetscFunctionBegin; 134415e8a5b3SHong Zhang ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1345a4ada70bSHong Zhang ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 13464445ddedSHong Zhang ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 134749b5e25fSSatish Balay PetscFunctionReturn(0); 134849b5e25fSSatish Balay } 134949b5e25fSSatish Balay 135049b5e25fSSatish Balay 1351