149b5e25fSSatish Balay 249b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 33a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 449b5e25fSSatish Balay #include "src/inline/ilu.h" 549a6740bSHong Zhang #include "include/petscis.h" 649b5e25fSSatish Balay 78dc9108eSHong Zhang #if !defined(PETSC_USE_COMPLEX) 85f9f512dSHong Zhang /* 95f9f512dSHong Zhang input: 10c037c3f7SHong Zhang F -- numeric factor 115f9f512dSHong Zhang output: 12c037c3f7SHong Zhang nneg, nzero, npos: matrix inertia 135f9f512dSHong Zhang */ 145f9f512dSHong Zhang 155f9f512dSHong Zhang #undef __FUNCT__ 165f9f512dSHong Zhang #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 1713f74950SBarry Smith PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 185f9f512dSHong Zhang { 19638f5ce0SDinesh Kaushik Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 203e0d88b5SBarry Smith PetscScalar *dd = fact_ptr->a; 2113f74950SBarry Smith PetscInt mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i; 225f9f512dSHong Zhang 235f9f512dSHong Zhang PetscFunctionBegin; 2477431f27SBarry Smith if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 25eeeff2ecSHong Zhang nneig_tmp = 0; npos_tmp = 0; 26eeeff2ecSHong Zhang for (i=0; i<mbs; i++){ 27eeeff2ecSHong Zhang if (PetscRealPart(dd[*fi]) > 0.0){ 28eeeff2ecSHong Zhang npos_tmp++; 29eeeff2ecSHong Zhang } else if (PetscRealPart(dd[*fi]) < 0.0){ 30eeeff2ecSHong Zhang nneig_tmp++; 315f9f512dSHong Zhang } 32eeeff2ecSHong Zhang fi++; 333e0d88b5SBarry Smith } 34eeeff2ecSHong Zhang if (nneig) *nneig = nneig_tmp; 35eeeff2ecSHong Zhang if (npos) *npos = npos_tmp; 36eeeff2ecSHong Zhang if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 37eeeff2ecSHong Zhang 385f9f512dSHong Zhang PetscFunctionReturn(0); 395f9f512dSHong Zhang } 408dc9108eSHong Zhang #endif /* !defined(PETSC_USE_COMPLEX) */ 415f9f512dSHong Zhang 425f9f512dSHong Zhang /* 435f9f512dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 4410c27e3dSHong Zhang Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 455f9f512dSHong Zhang */ 4610c27e3dSHong Zhang #undef __FUNCT__ 4710c27e3dSHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 4810c27e3dSHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 4910c27e3dSHong Zhang { 5010c27e3dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 5110c27e3dSHong Zhang PetscErrorCode ierr; 5210c27e3dSHong Zhang PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 5310c27e3dSHong Zhang PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 5410c27e3dSHong Zhang PetscInt m,reallocs = 0,prow; 5510c27e3dSHong Zhang PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 5610c27e3dSHong Zhang PetscReal f = info->fill; 5710c27e3dSHong Zhang PetscTruth perm_identity; 5810c27e3dSHong Zhang 5910c27e3dSHong Zhang PetscFunctionBegin; 6010c27e3dSHong Zhang /* check whether perm is the identity mapping */ 6110c27e3dSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 6210c27e3dSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 6310c27e3dSHong Zhang 6410c27e3dSHong Zhang if (perm_identity){ /* without permutation */ 6510c27e3dSHong Zhang a->permute = PETSC_FALSE; 6610c27e3dSHong Zhang ai = a->i; aj = a->j; 6710c27e3dSHong Zhang } else { /* non-trivial permutation */ 6810c27e3dSHong Zhang a->permute = PETSC_TRUE; 6910c27e3dSHong Zhang ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 7010c27e3dSHong Zhang ai = a->inew; aj = a->jnew; 7110c27e3dSHong Zhang } 7210c27e3dSHong Zhang 7310c27e3dSHong Zhang /* initialization */ 7410c27e3dSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 7510c27e3dSHong Zhang umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 7610c27e3dSHong Zhang ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 7710c27e3dSHong Zhang iu[0] = mbs+1; 7810c27e3dSHong Zhang juidx = mbs + 1; /* index for ju */ 7910c27e3dSHong Zhang ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 8010c27e3dSHong Zhang q = jl + mbs; /* linked list for col index */ 8110c27e3dSHong Zhang for (i=0; i<mbs; i++){ 8210c27e3dSHong Zhang jl[i] = mbs; 8310c27e3dSHong Zhang q[i] = 0; 8410c27e3dSHong Zhang } 8510c27e3dSHong Zhang 8610c27e3dSHong Zhang /* for each row k */ 8710c27e3dSHong Zhang for (k=0; k<mbs; k++){ 8810c27e3dSHong Zhang for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 8910c27e3dSHong Zhang nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 9010c27e3dSHong Zhang q[k] = mbs; 9110c27e3dSHong Zhang /* initialize nonzero structure of k-th row to row rip[k] of A */ 9210c27e3dSHong Zhang jmin = ai[rip[k]] +1; /* exclude diag[k] */ 9310c27e3dSHong Zhang jmax = ai[rip[k]+1]; 9410c27e3dSHong Zhang for (j=jmin; j<jmax; j++){ 9510c27e3dSHong Zhang vj = rip[aj[j]]; /* col. value */ 9610c27e3dSHong Zhang if(vj > k){ 9710c27e3dSHong Zhang qm = k; 9810c27e3dSHong Zhang do { 9910c27e3dSHong Zhang m = qm; qm = q[m]; 10010c27e3dSHong Zhang } while(qm < vj); 10110c27e3dSHong Zhang if (qm == vj) { 10210c27e3dSHong Zhang SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 10310c27e3dSHong Zhang } 10410c27e3dSHong Zhang nzk++; 10510c27e3dSHong Zhang q[m] = vj; 10610c27e3dSHong Zhang q[vj] = qm; 10710c27e3dSHong Zhang } /* if(vj > k) */ 10810c27e3dSHong Zhang } /* for (j=jmin; j<jmax; j++) */ 10910c27e3dSHong Zhang 11010c27e3dSHong Zhang /* modify nonzero structure of k-th row by computing fill-in 11110c27e3dSHong Zhang for each row i to be merged in */ 11210c27e3dSHong Zhang prow = k; 11310c27e3dSHong Zhang prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 11410c27e3dSHong Zhang 11510c27e3dSHong Zhang while (prow < k){ 11610c27e3dSHong Zhang /* merge row prow into k-th row */ 11710c27e3dSHong Zhang jmin = iu[prow] + 1; jmax = iu[prow+1]; 11810c27e3dSHong Zhang qm = k; 11910c27e3dSHong Zhang for (j=jmin; j<jmax; j++){ 12010c27e3dSHong Zhang vj = ju[j]; 12110c27e3dSHong Zhang do { 12210c27e3dSHong Zhang m = qm; qm = q[m]; 12310c27e3dSHong Zhang } while (qm < vj); 12410c27e3dSHong Zhang if (qm != vj){ 12510c27e3dSHong Zhang nzk++; q[m] = vj; q[vj] = qm; qm = vj; 12610c27e3dSHong Zhang } 12710c27e3dSHong Zhang } 12810c27e3dSHong Zhang prow = jl[prow]; /* next pivot row */ 12910c27e3dSHong Zhang } 13010c27e3dSHong Zhang 13110c27e3dSHong Zhang /* add k to row list for first nonzero element in k-th row */ 13210c27e3dSHong Zhang if (nzk > 0){ 13310c27e3dSHong Zhang i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 13410c27e3dSHong Zhang jl[k] = jl[i]; jl[i] = k; 13510c27e3dSHong Zhang } 13610c27e3dSHong Zhang iu[k+1] = iu[k] + nzk; 13710c27e3dSHong Zhang 13810c27e3dSHong Zhang /* allocate more space to ju if needed */ 13910c27e3dSHong Zhang if (iu[k+1] > umax) { 14010c27e3dSHong Zhang /* estimate how much additional space we will need */ 14110c27e3dSHong Zhang /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 14210c27e3dSHong Zhang /* just double the memory each time */ 14310c27e3dSHong Zhang maxadd = umax; 14410c27e3dSHong Zhang if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 14510c27e3dSHong Zhang umax += maxadd; 14610c27e3dSHong Zhang 14710c27e3dSHong Zhang /* allocate a longer ju */ 14810c27e3dSHong Zhang ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 14910c27e3dSHong Zhang ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 15010c27e3dSHong Zhang ierr = PetscFree(ju);CHKERRQ(ierr); 15110c27e3dSHong Zhang ju = jutmp; 15210c27e3dSHong Zhang reallocs++; /* count how many times we realloc */ 15310c27e3dSHong Zhang } 15410c27e3dSHong Zhang 15510c27e3dSHong Zhang /* save nonzero structure of k-th row in ju */ 15610c27e3dSHong Zhang i=k; 15710c27e3dSHong Zhang while (nzk --) { 15810c27e3dSHong Zhang i = q[i]; 15910c27e3dSHong Zhang ju[juidx++] = i; 16010c27e3dSHong Zhang } 16110c27e3dSHong Zhang } 16210c27e3dSHong Zhang 16310c27e3dSHong Zhang if (ai[mbs] != 0) { 16410c27e3dSHong Zhang PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 16510c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 16610c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 16710c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 16810c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 16910c27e3dSHong Zhang } else { 17010c27e3dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 17110c27e3dSHong Zhang } 17210c27e3dSHong Zhang 17310c27e3dSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 17410c27e3dSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 17510c27e3dSHong Zhang 17610c27e3dSHong Zhang /* put together the new matrix */ 17710c27e3dSHong Zhang ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 17810c27e3dSHong Zhang ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 17910c27e3dSHong Zhang ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 18010c27e3dSHong Zhang 18110c27e3dSHong Zhang /* PetscLogObjectParent(*B,iperm); */ 18210c27e3dSHong Zhang b = (Mat_SeqSBAIJ*)(*B)->data; 18310c27e3dSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 18410c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 18510c27e3dSHong Zhang /* the next line frees the default space generated by the Create() */ 18610c27e3dSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 18710c27e3dSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 18810c27e3dSHong Zhang ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 18910c27e3dSHong Zhang b->j = ju; 19010c27e3dSHong Zhang b->i = iu; 19110c27e3dSHong Zhang b->diag = 0; 19210c27e3dSHong Zhang b->ilen = 0; 19310c27e3dSHong Zhang b->imax = 0; 19410c27e3dSHong Zhang b->row = perm; 19510c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 19610c27e3dSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 19710c27e3dSHong Zhang b->icol = perm; 19810c27e3dSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 19910c27e3dSHong Zhang ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 20010c27e3dSHong Zhang /* In b structure: Free imax, ilen, old a, old j. 20110c27e3dSHong Zhang Allocate idnew, solve_work, new a, new j */ 20210c27e3dSHong Zhang PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 20310c27e3dSHong Zhang b->maxnz = b->nz = iu[mbs]; 20410c27e3dSHong Zhang 20510c27e3dSHong Zhang (*B)->factor = FACTOR_CHOLESKY; 20610c27e3dSHong Zhang (*B)->info.factor_mallocs = reallocs; 20710c27e3dSHong Zhang (*B)->info.fill_ratio_given = f; 20810c27e3dSHong Zhang if (ai[mbs] != 0) { 20910c27e3dSHong Zhang (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 21010c27e3dSHong Zhang } else { 21110c27e3dSHong Zhang (*B)->info.fill_ratio_needed = 0.0; 21210c27e3dSHong Zhang } 21310c27e3dSHong Zhang 21410c27e3dSHong Zhang if (perm_identity){ 21510c27e3dSHong Zhang switch (bs) { 21610c27e3dSHong Zhang case 1: 21710c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 21810c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 21910c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 22010c27e3dSHong Zhang break; 22110c27e3dSHong Zhang case 2: 22210c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 22310c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 22410c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 22510c27e3dSHong Zhang break; 22610c27e3dSHong Zhang case 3: 22710c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 22810c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 22910c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 23010c27e3dSHong Zhang break; 23110c27e3dSHong Zhang case 4: 23210c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 23310c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 23410c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 23510c27e3dSHong Zhang break; 23610c27e3dSHong Zhang case 5: 23710c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 23810c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 23910c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 24010c27e3dSHong Zhang break; 24110c27e3dSHong Zhang case 6: 24210c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 24310c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 24410c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 24510c27e3dSHong Zhang break; 24610c27e3dSHong Zhang case 7: 24710c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 24810c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 24910c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 25010c27e3dSHong Zhang break; 25110c27e3dSHong Zhang default: 25210c27e3dSHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 25310c27e3dSHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 25410c27e3dSHong Zhang PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 25510c27e3dSHong Zhang break; 25610c27e3dSHong Zhang } 25710c27e3dSHong Zhang } 25810c27e3dSHong Zhang PetscFunctionReturn(0); 25910c27e3dSHong Zhang } 26010c27e3dSHong Zhang /* 26110c27e3dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. 26210c27e3dSHong Zhang */ 26398a8e78dSHong Zhang #include "petscbt.h" 26498a8e78dSHong Zhang #include "src/mat/utils/freespace.h" 2654a2ae208SSatish Balay #undef __FUNCT__ 2664a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 26798a8e78dSHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 26849b5e25fSSatish Balay { 26998a8e78dSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 27098a8e78dSHong Zhang Mat_SeqSBAIJ *b; 27198a8e78dSHong Zhang Mat B; 2726849ba73SBarry Smith PetscErrorCode ierr; 273671cb588SHong Zhang PetscTruth perm_identity; 27498a8e78dSHong Zhang PetscReal fill = info->fill; 2757625dc9aSHong Zhang PetscInt *rip,i,mbs=a->mbs,bs=A->bs,*ai,*aj,reallocs=0,prow; 27698a8e78dSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2777625dc9aSHong Zhang PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr; 27898a8e78dSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 27998a8e78dSHong Zhang PetscBT lnkbt; 28049b5e25fSSatish Balay 28149b5e25fSSatish Balay PetscFunctionBegin; 28210c27e3dSHong Zhang /* 28310c27e3dSHong Zhang This code originally uses Modified Sparse Row (MSR) storage 28410c27e3dSHong Zhang (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 28510c27e3dSHong Zhang Then it is rewritten so the factor B takes seqsbaij format. However the associated 28610c27e3dSHong Zhang MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 28710c27e3dSHong Zhang thus the original code in MSR format is still used for these cases. 28810c27e3dSHong Zhang The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 28910c27e3dSHong Zhang MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 29010c27e3dSHong Zhang */ 291fff829cfSHong Zhang if (bs > 1){ 29298a8e78dSHong Zhang ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 293fff829cfSHong Zhang PetscFunctionReturn(0); 294fff829cfSHong Zhang } 29510c27e3dSHong Zhang 29698a8e78dSHong Zhang /* check whether perm is the identity mapping */ 29798a8e78dSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 29898a8e78dSHong Zhang 299fff829cfSHong Zhang if (perm_identity){ 300fff829cfSHong Zhang a->permute = PETSC_FALSE; 301fff829cfSHong Zhang ai = a->i; aj = a->j; 302fff829cfSHong Zhang } else { 303fff829cfSHong Zhang a->permute = PETSC_TRUE; 304fff829cfSHong Zhang ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 305fff829cfSHong Zhang ai = a->inew; aj = a->jnew; 306fff829cfSHong Zhang } 307fff829cfSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 308fff829cfSHong Zhang 309fff829cfSHong Zhang /* initialization */ 3107625dc9aSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 31198a8e78dSHong Zhang ui[0] = 0; 31298a8e78dSHong Zhang 31398a8e78dSHong Zhang /* jl: linked list for storing indices of the pivot rows 3147625dc9aSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 3157625dc9aSHong Zhang ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 3167625dc9aSHong Zhang il = jl + mbs; 3177625dc9aSHong Zhang cols = il + mbs; 3187625dc9aSHong Zhang ui_ptr = (PetscInt**)(cols + mbs); 3197625dc9aSHong Zhang 3207625dc9aSHong Zhang for (i=0; i<mbs; i++){ 3217625dc9aSHong Zhang jl[i] = mbs; il[i] = 0; 322fff829cfSHong Zhang } 323fff829cfSHong Zhang 32498a8e78dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 3257625dc9aSHong Zhang nlnk = mbs + 1; 3267625dc9aSHong Zhang ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327fff829cfSHong Zhang 3287625dc9aSHong Zhang /* initial FreeSpace size is fill*(ai[mbs]+1) */ 3297625dc9aSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 33098a8e78dSHong Zhang current_space = free_space; 33198a8e78dSHong Zhang 3327625dc9aSHong Zhang for (k=0; k<mbs; k++){ /* for each active row k */ 33398a8e78dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 33498a8e78dSHong Zhang nzk = 0; 33598a8e78dSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 3367625dc9aSHong Zhang for (j=0; j<ncols; j++){ 3377625dc9aSHong Zhang i = *(aj + ai[rip[k]] + j); 3387625dc9aSHong Zhang cols[j] = rip[i]; 3397625dc9aSHong Zhang } 3407625dc9aSHong Zhang ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 34198a8e78dSHong Zhang nzk += nlnk; 34298a8e78dSHong Zhang 34398a8e78dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 34498a8e78dSHong Zhang prow = jl[k]; /* 1st pivot row */ 345fff829cfSHong Zhang 346fff829cfSHong Zhang while (prow < k){ 347fff829cfSHong Zhang nextprow = jl[prow]; 34898a8e78dSHong Zhang /* merge prow into k-th row */ 3497625dc9aSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 35098a8e78dSHong Zhang jmax = ui[prow+1]; 35198a8e78dSHong Zhang ncols = jmax-jmin; 3527625dc9aSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 3537625dc9aSHong Zhang ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 35498a8e78dSHong Zhang nzk += nlnk; 355fff829cfSHong Zhang 35698a8e78dSHong Zhang /* update il and jl for prow */ 357fff829cfSHong Zhang if (jmin < jmax){ 358fff829cfSHong Zhang il[prow] = jmin; 3597625dc9aSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 360fff829cfSHong Zhang } 361fff829cfSHong Zhang prow = nextprow; 362fff829cfSHong Zhang } 363fff829cfSHong Zhang 36498a8e78dSHong Zhang /* if free space is not available, make more free space */ 36598a8e78dSHong Zhang if (current_space->local_remaining<nzk) { 3667625dc9aSHong Zhang i = mbs - k + 1; /* num of unfactored rows */ 36798a8e78dSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 36898a8e78dSHong Zhang ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 36998a8e78dSHong Zhang reallocs++; 37098a8e78dSHong Zhang } 37198a8e78dSHong Zhang 37298a8e78dSHong Zhang /* copy data into free space, then initialize lnk */ 3737625dc9aSHong Zhang ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 37498a8e78dSHong Zhang 37598a8e78dSHong Zhang /* add the k-th row into il and jl */ 37698a8e78dSHong Zhang if (nzk-1 > 0){ 3777625dc9aSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 378fff829cfSHong Zhang jl[k] = jl[i]; jl[i] = k; 37998a8e78dSHong Zhang il[k] = ui[k] + 1; 380fff829cfSHong Zhang } 3817625dc9aSHong Zhang ui_ptr[k] = current_space->array; 38298a8e78dSHong Zhang current_space->array += nzk; 38398a8e78dSHong Zhang current_space->local_used += nzk; 38498a8e78dSHong Zhang current_space->local_remaining -= nzk; 385fff829cfSHong Zhang 38698a8e78dSHong Zhang ui[k+1] = ui[k] + nzk; 387fff829cfSHong Zhang } 388fff829cfSHong Zhang 3897625dc9aSHong Zhang if (ai[mbs] != 0) { 3907625dc9aSHong Zhang PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 39198a8e78dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 392fff829cfSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 39398a8e78dSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 394fff829cfSHong Zhang } else { 395fff829cfSHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 396fff829cfSHong Zhang } 397fff829cfSHong Zhang 398fff829cfSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 399fff829cfSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 400fff829cfSHong Zhang 40198a8e78dSHong Zhang /* destroy list of free space and other temporary array(s) */ 4027625dc9aSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 40398a8e78dSHong Zhang ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 40498a8e78dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 405fff829cfSHong Zhang 40698a8e78dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 4077625dc9aSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 40898a8e78dSHong Zhang B = *fact; 40998a8e78dSHong Zhang ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 41098a8e78dSHong Zhang ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 41198a8e78dSHong Zhang 41298a8e78dSHong Zhang b = (Mat_SeqSBAIJ*)B->data; 413fff829cfSHong Zhang ierr = PetscFree(b->imax);CHKERRQ(ierr); 414fff829cfSHong Zhang b->singlemalloc = PETSC_FALSE; 415fff829cfSHong Zhang /* the next line frees the default space generated by the Create() */ 416fff829cfSHong Zhang ierr = PetscFree(b->a);CHKERRQ(ierr); 417fff829cfSHong Zhang ierr = PetscFree(b->ilen);CHKERRQ(ierr); 4187625dc9aSHong Zhang ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 41998a8e78dSHong Zhang b->j = uj; 42098a8e78dSHong Zhang b->i = ui; 421fff829cfSHong Zhang b->diag = 0; 422fff829cfSHong Zhang b->ilen = 0; 423fff829cfSHong Zhang b->imax = 0; 424fff829cfSHong Zhang b->row = perm; 425fff829cfSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 426fff829cfSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 427fff829cfSHong Zhang b->icol = perm; 428fff829cfSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 4297625dc9aSHong Zhang ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 4307625dc9aSHong Zhang PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 4317625dc9aSHong Zhang b->maxnz = b->nz = ui[mbs]; 432fff829cfSHong Zhang 43398a8e78dSHong Zhang B->factor = FACTOR_CHOLESKY; 43498a8e78dSHong Zhang B->info.factor_mallocs = reallocs; 43598a8e78dSHong Zhang B->info.fill_ratio_given = fill; 4367625dc9aSHong Zhang if (ai[mbs] != 0) { 4377625dc9aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 438fff829cfSHong Zhang } else { 43998a8e78dSHong Zhang B->info.fill_ratio_needed = 0.0; 440fff829cfSHong Zhang } 441fff829cfSHong Zhang 442fff829cfSHong Zhang if (perm_identity){ 44398a8e78dSHong Zhang switch (bs) { 44498a8e78dSHong Zhang case 1: 44598a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 44698a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 447*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 44898a8e78dSHong Zhang break; 44998a8e78dSHong Zhang case 2: 45098a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 45198a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 452*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 45398a8e78dSHong Zhang break; 45498a8e78dSHong Zhang case 3: 45598a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 45698a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 457*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 45898a8e78dSHong Zhang break; 45998a8e78dSHong Zhang case 4: 46098a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 46198a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 462*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 46398a8e78dSHong Zhang break; 46498a8e78dSHong Zhang case 5: 46598a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 46698a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 467*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 46898a8e78dSHong Zhang break; 46998a8e78dSHong Zhang case 6: 47098a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 47198a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 472*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 47398a8e78dSHong Zhang break; 47498a8e78dSHong Zhang case 7: 47598a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 47698a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 477*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 47898a8e78dSHong Zhang break; 47998a8e78dSHong Zhang default: 48098a8e78dSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 48198a8e78dSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 482*7701de02SHong Zhang PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 48398a8e78dSHong Zhang break; 48498a8e78dSHong Zhang } 48598a8e78dSHong Zhang } 486064503c5SHong Zhang PetscFunctionReturn(0); 487064503c5SHong Zhang } 4884a2ae208SSatish Balay #undef __FUNCT__ 4894a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 490dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 49149b5e25fSSatish Balay { 49249b5e25fSSatish Balay Mat C = *B; 4934c16a6a6SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 4944c16a6a6SHong Zhang IS perm = b->row; 4956849ba73SBarry Smith PetscErrorCode ierr; 49613f74950SBarry Smith PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 49713f74950SBarry Smith PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 49813f74950SBarry Smith PetscInt bs=A->bs,bs2 = a->bs2; 4994c16a6a6SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 5004c16a6a6SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 50128de702eSHong Zhang MatScalar *work; 50213f74950SBarry Smith PetscInt *pivots; 5034c16a6a6SHong Zhang 5044c16a6a6SHong Zhang PetscFunctionBegin; 5054c16a6a6SHong Zhang /* initialization */ 50682502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 5074c16a6a6SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 50813f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 50928de702eSHong Zhang jl = il + mbs; 5104c16a6a6SHong Zhang for (i=0; i<mbs; i++) { 5114c16a6a6SHong Zhang jl[i] = mbs; il[0] = 0; 5124c16a6a6SHong Zhang } 513b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 51428de702eSHong Zhang uik = dk + bs2; 51528de702eSHong Zhang work = uik + bs2; 51613f74950SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 5174c16a6a6SHong Zhang 5184c16a6a6SHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 5194c16a6a6SHong Zhang 5204c16a6a6SHong Zhang /* check permutation */ 5214c16a6a6SHong Zhang if (!a->permute){ 5224c16a6a6SHong Zhang ai = a->i; aj = a->j; aa = a->a; 5234c16a6a6SHong Zhang } else { 5244c16a6a6SHong Zhang ai = a->inew; aj = a->jnew; 52582502324SSatish Balay ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 5264c16a6a6SHong Zhang ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 52713f74950SBarry Smith ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 52813f74950SBarry Smith ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 5294c16a6a6SHong Zhang 5304c16a6a6SHong Zhang for (i=0; i<mbs; i++){ 5314c16a6a6SHong Zhang jmin = ai[i]; jmax = ai[i+1]; 5324c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 5334c16a6a6SHong Zhang while (a2anew[j] != j){ 5344c16a6a6SHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 5354c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 5364c16a6a6SHong Zhang dk[k1] = aa[k*bs2+k1]; 5374c16a6a6SHong Zhang aa[k*bs2+k1] = aa[j*bs2+k1]; 5384c16a6a6SHong Zhang aa[j*bs2+k1] = dk[k1]; 5394c16a6a6SHong Zhang } 5404c16a6a6SHong Zhang } 5414c16a6a6SHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 5424c16a6a6SHong Zhang if (i > aj[j]){ 5434c16a6a6SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 5444c16a6a6SHong Zhang ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 5454c16a6a6SHong Zhang for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 5464c16a6a6SHong Zhang for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 5474c16a6a6SHong Zhang for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 5484c16a6a6SHong Zhang } 5494c16a6a6SHong Zhang } 5504c16a6a6SHong Zhang } 5514c16a6a6SHong Zhang } 552323b833fSBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 5534c16a6a6SHong Zhang } 5544c16a6a6SHong Zhang 5554c16a6a6SHong Zhang /* for each row k */ 5564c16a6a6SHong Zhang for (k = 0; k<mbs; k++){ 5574c16a6a6SHong Zhang 5584c16a6a6SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 5594c16a6a6SHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 560057f5ba7SHong Zhang 5614c16a6a6SHong Zhang ap = aa + jmin*bs2; 5624c16a6a6SHong Zhang for (j = jmin; j < jmax; j++){ 5634c16a6a6SHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 5644c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 5654c16a6a6SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 5664c16a6a6SHong Zhang } 5674c16a6a6SHong Zhang 5684c16a6a6SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 5694c16a6a6SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5704c16a6a6SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 5714c16a6a6SHong Zhang 572057f5ba7SHong Zhang while (i < k){ 5734c16a6a6SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 5744c16a6a6SHong Zhang 5754c16a6a6SHong Zhang /* compute multiplier */ 5764c16a6a6SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 5774c16a6a6SHong Zhang 5784c16a6a6SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 5794c16a6a6SHong Zhang diag = ba + i*bs2; 5804c16a6a6SHong Zhang u = ba + ili*bs2; 5814c16a6a6SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5824c16a6a6SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 5834c16a6a6SHong Zhang 5844c16a6a6SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 5854c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 5864c16a6a6SHong Zhang 5874c16a6a6SHong Zhang /* update -U(i,k) */ 5884c16a6a6SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 5894c16a6a6SHong Zhang 5904c16a6a6SHong Zhang /* add multiple of row i to k-th row ... */ 5914c16a6a6SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 5924c16a6a6SHong Zhang if (jmin < jmax){ 5934c16a6a6SHong Zhang for (j=jmin; j<jmax; j++) { 5944c16a6a6SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 5954c16a6a6SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 5964c16a6a6SHong Zhang u = ba + j*bs2; 5974c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 5984c16a6a6SHong Zhang } 5994c16a6a6SHong Zhang 6004c16a6a6SHong Zhang /* ... add i to row list for next nonzero entry */ 6014c16a6a6SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 6024c16a6a6SHong Zhang j = bj[jmin]; 6034c16a6a6SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 6044c16a6a6SHong Zhang } 6054c16a6a6SHong Zhang i = nexti; 6064c16a6a6SHong Zhang } 6074c16a6a6SHong Zhang 6084c16a6a6SHong Zhang /* save nonzero entries in k-th row of U ... */ 6094c16a6a6SHong Zhang 6104c16a6a6SHong Zhang /* invert diagonal block */ 6114c16a6a6SHong Zhang diag = ba+k*bs2; 6124c16a6a6SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 613d230e6fdSBarry Smith ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 6144c16a6a6SHong Zhang 6154c16a6a6SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 6164c16a6a6SHong Zhang if (jmin < jmax) { 6174c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 6184c16a6a6SHong Zhang vj = bj[j]; /* block col. index of U */ 6194c16a6a6SHong Zhang u = ba + j*bs2; 6204c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 6214c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 6224c16a6a6SHong Zhang *u++ = *rtmp_ptr; 6234c16a6a6SHong Zhang *rtmp_ptr++ = 0.0; 6244c16a6a6SHong Zhang } 6254c16a6a6SHong Zhang } 6264c16a6a6SHong Zhang 6274c16a6a6SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 6284c16a6a6SHong Zhang il[k] = jmin; 6294c16a6a6SHong Zhang i = bj[jmin]; 6304c16a6a6SHong Zhang jl[k] = jl[i]; jl[i] = k; 6314c16a6a6SHong Zhang } 6324c16a6a6SHong Zhang } 6334c16a6a6SHong Zhang 6344c16a6a6SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 6354c16a6a6SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 6364c16a6a6SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 6374c16a6a6SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 6384c16a6a6SHong Zhang if (a->permute){ 6394c16a6a6SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 6404c16a6a6SHong Zhang } 6414c16a6a6SHong Zhang 6424c16a6a6SHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 6434c16a6a6SHong Zhang C->factor = FACTOR_CHOLESKY; 6444c16a6a6SHong Zhang C->assembled = PETSC_TRUE; 6454c16a6a6SHong Zhang C->preallocated = PETSC_TRUE; 646b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 6474c16a6a6SHong Zhang PetscFunctionReturn(0); 6484c16a6a6SHong Zhang } 649d4edadd4SHong Zhang 6504a2ae208SSatish Balay #undef __FUNCT__ 6514a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 652dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 653671cb588SHong Zhang { 654671cb588SHong Zhang Mat C = *B; 655671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 656dfbe8321SBarry Smith PetscErrorCode ierr; 65713f74950SBarry Smith PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 65813f74950SBarry Smith PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 65913f74950SBarry Smith PetscInt bs=A->bs,bs2 = a->bs2; 660671cb588SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 661671cb588SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 66228de702eSHong Zhang MatScalar *work; 66313f74950SBarry Smith PetscInt *pivots; 664671cb588SHong Zhang 665671cb588SHong Zhang PetscFunctionBegin; 666671cb588SHong Zhang /* initialization */ 667671cb588SHong Zhang 66882502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 669671cb588SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 67013f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 67128de702eSHong Zhang jl = il + mbs; 672671cb588SHong Zhang for (i=0; i<mbs; i++) { 673671cb588SHong Zhang jl[i] = mbs; il[0] = 0; 674671cb588SHong Zhang } 675b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 67628de702eSHong Zhang uik = dk + bs2; 67728de702eSHong Zhang work = uik + bs2; 67813f74950SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 679671cb588SHong Zhang 680671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 681671cb588SHong Zhang 682671cb588SHong Zhang /* for each row k */ 683671cb588SHong Zhang for (k = 0; k<mbs; k++){ 684671cb588SHong Zhang 685671cb588SHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 686671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 687671cb588SHong Zhang ap = aa + jmin*bs2; 688671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 689671cb588SHong Zhang vj = aj[j]; /* block col. index */ 690671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 691671cb588SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 692671cb588SHong Zhang } 693671cb588SHong Zhang 694671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 695671cb588SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 696671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 697671cb588SHong Zhang 698057f5ba7SHong Zhang while (i < k){ 699671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 700671cb588SHong Zhang 701671cb588SHong Zhang /* compute multiplier */ 702671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 703671cb588SHong Zhang 704671cb588SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 705671cb588SHong Zhang diag = ba + i*bs2; 706671cb588SHong Zhang u = ba + ili*bs2; 707671cb588SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 708671cb588SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 709671cb588SHong Zhang 710671cb588SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 711671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 712671cb588SHong Zhang 713671cb588SHong Zhang /* update -U(i,k) */ 714671cb588SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 715671cb588SHong Zhang 716671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 717671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 718671cb588SHong Zhang if (jmin < jmax){ 719671cb588SHong Zhang for (j=jmin; j<jmax; j++) { 720671cb588SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 721671cb588SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 722671cb588SHong Zhang u = ba + j*bs2; 723671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 724671cb588SHong Zhang } 725671cb588SHong Zhang 726671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 727671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 728671cb588SHong Zhang j = bj[jmin]; 729671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 730671cb588SHong Zhang } 731671cb588SHong Zhang i = nexti; 732671cb588SHong Zhang } 733671cb588SHong Zhang 734671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 735671cb588SHong Zhang 736671cb588SHong Zhang /* invert diagonal block */ 737671cb588SHong Zhang diag = ba+k*bs2; 738671cb588SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 739d230e6fdSBarry Smith ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 740671cb588SHong Zhang 741671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 742671cb588SHong Zhang if (jmin < jmax) { 743671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 744671cb588SHong Zhang vj = bj[j]; /* block col. index of U */ 745671cb588SHong Zhang u = ba + j*bs2; 746671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 747671cb588SHong Zhang for (k1=0; k1<bs2; k1++){ 748671cb588SHong Zhang *u++ = *rtmp_ptr; 749671cb588SHong Zhang *rtmp_ptr++ = 0.0; 750671cb588SHong Zhang } 751671cb588SHong Zhang } 752671cb588SHong Zhang 753671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 754671cb588SHong Zhang il[k] = jmin; 755671cb588SHong Zhang i = bj[jmin]; 756671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 757671cb588SHong Zhang } 758671cb588SHong Zhang } 759671cb588SHong Zhang 760671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 761671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 762671cb588SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 763671cb588SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 764671cb588SHong Zhang 765671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 766671cb588SHong Zhang C->assembled = PETSC_TRUE; 767671cb588SHong Zhang C->preallocated = PETSC_TRUE; 768b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 769671cb588SHong Zhang PetscFunctionReturn(0); 770671cb588SHong Zhang } 771671cb588SHong Zhang 77249b5e25fSSatish Balay /* 773fcf159c0SHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 774cc0c071aSHong Zhang Version for blocks 2 by 2. 77549b5e25fSSatish Balay */ 7764a2ae208SSatish Balay #undef __FUNCT__ 7774a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 778dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 77949b5e25fSSatish Balay { 78049b5e25fSSatish Balay Mat C = *B; 78191602c66SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 782cc0c071aSHong Zhang IS perm = b->row; 7836849ba73SBarry Smith PetscErrorCode ierr; 78413f74950SBarry Smith PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 78513f74950SBarry Smith PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 786a1723e09SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 787cc0c071aSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 78849b5e25fSSatish Balay 78949b5e25fSSatish Balay PetscFunctionBegin; 79091602c66SHong Zhang /* initialization */ 79191602c66SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 79291602c66SHong Zhang window U(0:k, k:mbs-1). 79391602c66SHong Zhang jl: list of rows to be added to uneliminated rows 79491602c66SHong Zhang i>= k: jl(i) is the first row to be added to row i 79591602c66SHong Zhang i< k: jl(i) is the row following row i in some list of rows 79691602c66SHong Zhang jl(i) = mbs indicates the end of a list 79791602c66SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 79891602c66SHong Zhang row i of U */ 79982502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 800cc0c071aSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 80113f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 80228de702eSHong Zhang jl = il + mbs; 80391602c66SHong Zhang for (i=0; i<mbs; i++) { 8043845f261SHong Zhang jl[i] = mbs; il[0] = 0; 80591602c66SHong Zhang } 80682502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 80728de702eSHong Zhang uik = dk + 4; 808cc0c071aSHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 809cc0c071aSHong Zhang 810cc0c071aSHong Zhang /* check permutation */ 811cc0c071aSHong Zhang if (!a->permute){ 812cc0c071aSHong Zhang ai = a->i; aj = a->j; aa = a->a; 813cc0c071aSHong Zhang } else { 814cc0c071aSHong Zhang ai = a->inew; aj = a->jnew; 81582502324SSatish Balay ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 816cc0c071aSHong Zhang ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 81713f74950SBarry Smith ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 81813f74950SBarry Smith ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 819cc0c071aSHong Zhang 820cc0c071aSHong Zhang for (i=0; i<mbs; i++){ 821cc0c071aSHong Zhang jmin = ai[i]; jmax = ai[i+1]; 822cc0c071aSHong Zhang for (j=jmin; j<jmax; j++){ 823cc0c071aSHong Zhang while (a2anew[j] != j){ 824cc0c071aSHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 825cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 826cc0c071aSHong Zhang dk[k1] = aa[k*4+k1]; 827cc0c071aSHong Zhang aa[k*4+k1] = aa[j*4+k1]; 828cc0c071aSHong Zhang aa[j*4+k1] = dk[k1]; 829cc0c071aSHong Zhang } 830cc0c071aSHong Zhang } 831cc0c071aSHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 832cc0c071aSHong Zhang if (i > aj[j]){ 833a1723e09SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 834cc0c071aSHong Zhang ap = aa + j*4; /* ptr to the beginning of the block */ 835cc0c071aSHong Zhang dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 836cc0c071aSHong Zhang ap[1] = ap[2]; 837cc0c071aSHong Zhang ap[2] = dk[1]; 838cc0c071aSHong Zhang } 839cc0c071aSHong Zhang } 840cc0c071aSHong Zhang } 841ac355199SBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 842cc0c071aSHong Zhang } 8433845f261SHong Zhang 84491602c66SHong Zhang /* for each row k */ 84591602c66SHong Zhang for (k = 0; k<mbs; k++){ 84691602c66SHong Zhang 84791602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 848cc0c071aSHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 849cc0c071aSHong Zhang ap = aa + jmin*4; 85091602c66SHong Zhang for (j = jmin; j < jmax; j++){ 851cc0c071aSHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 852cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 853cc0c071aSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 85491602c66SHong Zhang } 85591602c66SHong Zhang 85691602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 857cc0c071aSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 85891602c66SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 85991602c66SHong Zhang 860057f5ba7SHong Zhang while (i < k){ 86191602c66SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 86291602c66SHong Zhang 8633845f261SHong Zhang /* compute multiplier */ 86491602c66SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 8653845f261SHong Zhang 8663845f261SHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 867cc0c071aSHong Zhang diag = ba + i*4; 868cc0c071aSHong Zhang u = ba + ili*4; 869cc0c071aSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 870cc0c071aSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 871cc0c071aSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 872cc0c071aSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 8733845f261SHong Zhang 8743845f261SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 875cc0c071aSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 876cc0c071aSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 877cc0c071aSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 878cc0c071aSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 8793845f261SHong Zhang 8803845f261SHong Zhang /* update -U(i,k): ba[ili] = uik */ 881cc0c071aSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 88291602c66SHong Zhang 88391602c66SHong Zhang /* add multiple of row i to k-th row ... */ 88491602c66SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 88591602c66SHong Zhang if (jmin < jmax){ 8863845f261SHong Zhang for (j=jmin; j<jmax; j++) { 8873845f261SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 888cc0c071aSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 889cc0c071aSHong Zhang u = ba + j*4; 890cc0c071aSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 891cc0c071aSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 892cc0c071aSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 893cc0c071aSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 8943845f261SHong Zhang } 8953845f261SHong Zhang 89691602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 89791602c66SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 89891602c66SHong Zhang j = bj[jmin]; 89991602c66SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 90091602c66SHong Zhang } 901a1723e09SHong Zhang i = nexti; 90291602c66SHong Zhang } 903cc0c071aSHong Zhang 90491602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 9053845f261SHong Zhang 906cc0c071aSHong Zhang /* invert diagonal block */ 907cc0c071aSHong Zhang diag = ba+k*4; 908cc0c071aSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 9093845f261SHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 9103845f261SHong Zhang 91191602c66SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 91291602c66SHong Zhang if (jmin < jmax) { 91391602c66SHong Zhang for (j=jmin; j<jmax; j++){ 914cc0c071aSHong Zhang vj = bj[j]; /* block col. index of U */ 915cc0c071aSHong Zhang u = ba + j*4; 916cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 917cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 918cc0c071aSHong Zhang *u++ = *rtmp_ptr; 919cc0c071aSHong Zhang *rtmp_ptr++ = 0.0; 920cc0c071aSHong Zhang } 921cc0c071aSHong Zhang } 9223845f261SHong Zhang 92391602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 92491602c66SHong Zhang il[k] = jmin; 92591602c66SHong Zhang i = bj[jmin]; 92691602c66SHong Zhang jl[k] = jl[i]; jl[i] = k; 92791602c66SHong Zhang } 92891602c66SHong Zhang } 9293845f261SHong Zhang 93049b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 93191602c66SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 9323845f261SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 93391602c66SHong Zhang if (a->permute) { 93491602c66SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 93591602c66SHong Zhang } 936cc0c071aSHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 93791602c66SHong Zhang C->factor = FACTOR_CHOLESKY; 93849b5e25fSSatish Balay C->assembled = PETSC_TRUE; 9395c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 940b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 94149b5e25fSSatish Balay PetscFunctionReturn(0); 94249b5e25fSSatish Balay } 94391602c66SHong Zhang 94449b5e25fSSatish Balay /* 94549b5e25fSSatish Balay Version for when blocks are 2 by 2 Using natural ordering 94649b5e25fSSatish Balay */ 9474a2ae208SSatish Balay #undef __FUNCT__ 9484a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 949dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 95049b5e25fSSatish Balay { 95149b5e25fSSatish Balay Mat C = *B; 952ab27746eSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 953dfbe8321SBarry Smith PetscErrorCode ierr; 95413f74950SBarry Smith PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 95513f74950SBarry Smith PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 956ab27746eSHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 957ab27746eSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 95849b5e25fSSatish Balay 95949b5e25fSSatish Balay PetscFunctionBegin; 960ab27746eSHong Zhang /* initialization */ 961ab27746eSHong Zhang /* il and jl record the first nonzero element in each row of the accessing 962ab27746eSHong Zhang window U(0:k, k:mbs-1). 963ab27746eSHong Zhang jl: list of rows to be added to uneliminated rows 964ab27746eSHong Zhang i>= k: jl(i) is the first row to be added to row i 965ab27746eSHong Zhang i< k: jl(i) is the row following row i in some list of rows 966ab27746eSHong Zhang jl(i) = mbs indicates the end of a list 967ab27746eSHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 968ab27746eSHong Zhang row i of U */ 96982502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 970ab27746eSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 97113f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 97228de702eSHong Zhang jl = il + mbs; 973ab27746eSHong Zhang for (i=0; i<mbs; i++) { 974ab27746eSHong Zhang jl[i] = mbs; il[0] = 0; 97549b5e25fSSatish Balay } 97682502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 97728de702eSHong Zhang uik = dk + 4; 978ab27746eSHong Zhang 979ab27746eSHong Zhang ai = a->i; aj = a->j; aa = a->a; 980ab27746eSHong Zhang 981ab27746eSHong Zhang /* for each row k */ 982ab27746eSHong Zhang for (k = 0; k<mbs; k++){ 983ab27746eSHong Zhang 984ab27746eSHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 985ab27746eSHong Zhang jmin = ai[k]; jmax = ai[k+1]; 986ab27746eSHong Zhang ap = aa + jmin*4; 987ab27746eSHong Zhang for (j = jmin; j < jmax; j++){ 988ab27746eSHong Zhang vj = aj[j]; /* block col. index */ 989ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 990ab27746eSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 99149b5e25fSSatish Balay } 992ab27746eSHong Zhang 993ab27746eSHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 994ab27746eSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 995ab27746eSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 996ab27746eSHong Zhang 997057f5ba7SHong Zhang while (i < k){ 998ab27746eSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 999ab27746eSHong Zhang 1000ab27746eSHong Zhang /* compute multiplier */ 1001ab27746eSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1002ab27746eSHong Zhang 1003ab27746eSHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1004ab27746eSHong Zhang diag = ba + i*4; 1005ab27746eSHong Zhang u = ba + ili*4; 1006ab27746eSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1007ab27746eSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1008ab27746eSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1009ab27746eSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1010ab27746eSHong Zhang 1011ab27746eSHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1012ab27746eSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1013ab27746eSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1014ab27746eSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1015ab27746eSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1016ab27746eSHong Zhang 1017ab27746eSHong Zhang /* update -U(i,k): ba[ili] = uik */ 1018ab27746eSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1019ab27746eSHong Zhang 1020ab27746eSHong Zhang /* add multiple of row i to k-th row ... */ 1021ab27746eSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 1022ab27746eSHong Zhang if (jmin < jmax){ 1023ab27746eSHong Zhang for (j=jmin; j<jmax; j++) { 1024ab27746eSHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1025ab27746eSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 1026ab27746eSHong Zhang u = ba + j*4; 1027ab27746eSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1028ab27746eSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1029ab27746eSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1030ab27746eSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 103149b5e25fSSatish Balay } 1032ab27746eSHong Zhang 1033ab27746eSHong Zhang /* ... add i to row list for next nonzero entry */ 1034ab27746eSHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1035ab27746eSHong Zhang j = bj[jmin]; 1036ab27746eSHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 103749b5e25fSSatish Balay } 1038ab27746eSHong Zhang i = nexti; 103949b5e25fSSatish Balay } 1040ab27746eSHong Zhang 1041ab27746eSHong Zhang /* save nonzero entries in k-th row of U ... */ 1042ab27746eSHong Zhang 104349b5e25fSSatish Balay /* invert diagonal block */ 1044ab27746eSHong Zhang diag = ba+k*4; 1045ab27746eSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1046ab27746eSHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1047ab27746eSHong Zhang 1048ab27746eSHong Zhang jmin = bi[k]; jmax = bi[k+1]; 1049ab27746eSHong Zhang if (jmin < jmax) { 1050ab27746eSHong Zhang for (j=jmin; j<jmax; j++){ 1051ab27746eSHong Zhang vj = bj[j]; /* block col. index of U */ 1052ab27746eSHong Zhang u = ba + j*4; 1053ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 1054ab27746eSHong Zhang for (k1=0; k1<4; k1++){ 1055ab27746eSHong Zhang *u++ = *rtmp_ptr; 1056ab27746eSHong Zhang *rtmp_ptr++ = 0.0; 1057ab27746eSHong Zhang } 1058ab27746eSHong Zhang } 1059ab27746eSHong Zhang 1060ab27746eSHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1061ab27746eSHong Zhang il[k] = jmin; 1062ab27746eSHong Zhang i = bj[jmin]; 1063ab27746eSHong Zhang jl[k] = jl[i]; jl[i] = k; 1064ab27746eSHong Zhang } 106549b5e25fSSatish Balay } 106649b5e25fSSatish Balay 106749b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 1068ab27746eSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1069ab27746eSHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 1070ab27746eSHong Zhang 1071ab27746eSHong Zhang C->factor = FACTOR_CHOLESKY; 107249b5e25fSSatish Balay C->assembled = PETSC_TRUE; 10735c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 1074b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 107549b5e25fSSatish Balay PetscFunctionReturn(0); 107649b5e25fSSatish Balay } 107749b5e25fSSatish Balay 107849b5e25fSSatish Balay /* 107998a8e78dSHong Zhang Numeric U^T*D*U factorization for SBAIJ format. 108091602c66SHong Zhang Version for blocks are 1 by 1. 108149b5e25fSSatish Balay */ 10824a2ae208SSatish Balay #undef __FUNCT__ 10834a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1084dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 108549b5e25fSSatish Balay { 108649b5e25fSSatish Balay Mat C = *B; 108749b5e25fSSatish Balay Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 108849b5e25fSSatish Balay IS ip=b->row; 10896849ba73SBarry Smith PetscErrorCode ierr; 1090997a0949SHong Zhang PetscInt *rip,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol; 1091fff829cfSHong Zhang PetscInt *ai,*aj,*a2anew; 1092997a0949SHong Zhang PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1093997a0949SHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi; 1094997a0949SHong Zhang PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 1095997a0949SHong Zhang PetscTruth damp,chshift; 1096997a0949SHong Zhang PetscInt nshift=0,ndamp=0; 109749b5e25fSSatish Balay 109849b5e25fSSatish Balay PetscFunctionBegin; 109949b5e25fSSatish Balay ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1100cb718733SHong Zhang if (!a->permute){ 11012d007305SHong Zhang ai = a->i; aj = a->j; aa = a->a; 11022d007305SHong Zhang } else { 11032d007305SHong Zhang ai = a->inew; aj = a->jnew; 1104fff829cfSHong Zhang nz = ai[mbs]; 1105fff829cfSHong Zhang ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1106fff829cfSHong Zhang a2anew = a->a2anew; 1107997a0949SHong Zhang bval = a->a; 1108fff829cfSHong Zhang for (j=0; j<nz; j++){ 1109997a0949SHong Zhang aa[a2anew[j]] = *(bval++); 11102d007305SHong Zhang } 11112d007305SHong Zhang } 11122d007305SHong Zhang 111391602c66SHong Zhang /* initialization */ 111449b5e25fSSatish Balay /* il and jl record the first nonzero element in each row of the accessing 111549b5e25fSSatish Balay window U(0:k, k:mbs-1). 111649b5e25fSSatish Balay jl: list of rows to be added to uneliminated rows 111749b5e25fSSatish Balay i>= k: jl(i) is the first row to be added to row i 111849b5e25fSSatish Balay i< k: jl(i) is the row following row i in some list of rows 111949b5e25fSSatish Balay jl(i) = mbs indicates the end of a list 112049b5e25fSSatish Balay il(i): points to the first nonzero element in columns k,...,mbs-1 of 112149b5e25fSSatish Balay row i of U */ 1122997a0949SHong Zhang nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1123997a0949SHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 112428de702eSHong Zhang jl = il + mbs; 1125997a0949SHong Zhang rtmp = (MatScalar*)(jl + mbs); 1126997a0949SHong Zhang 1127997a0949SHong Zhang shift_amount = 0; 1128997a0949SHong Zhang do { 1129997a0949SHong Zhang damp = PETSC_FALSE; 1130997a0949SHong Zhang chshift = PETSC_FALSE; 113149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 113249b5e25fSSatish Balay rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 113349b5e25fSSatish Balay } 1134997a0949SHong Zhang 113549b5e25fSSatish Balay for (k = 0; k<mbs; k++){ 1136997a0949SHong Zhang /*initialize k-th row by the perm[k]-th row of A */ 113749b5e25fSSatish Balay jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1138*7701de02SHong Zhang bval = ba + bi[k]; 113949b5e25fSSatish Balay for (j = jmin; j < jmax; j++){ 1140997a0949SHong Zhang col = rip[aj[j]]; 1141997a0949SHong Zhang rtmp[col] = aa[j]; 1142*7701de02SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 114349b5e25fSSatish Balay } 1144997a0949SHong Zhang /* damp the diagonal of the matrix */ 1145997a0949SHong Zhang if (ndamp||nshift) rtmp[k] += damping+shift_amount; 114649b5e25fSSatish Balay 114791602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 114849b5e25fSSatish Balay dk = rtmp[k]; 114949b5e25fSSatish Balay i = jl[k]; /* first row to be added to k_th row */ 115049b5e25fSSatish Balay 1151057f5ba7SHong Zhang while (i < k){ 115249b5e25fSSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 115349b5e25fSSatish Balay 1154fff829cfSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 115549b5e25fSSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1156997a0949SHong Zhang uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 115749b5e25fSSatish Balay dk += uikdi*ba[ili]; 1158658e7b3eSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 115949b5e25fSSatish Balay 1160997a0949SHong Zhang /* add multiple of row i to k-th row */ 116149b5e25fSSatish Balay jmin = ili + 1; jmax = bi[i+1]; 116249b5e25fSSatish Balay if (jmin < jmax){ 116349b5e25fSSatish Balay for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1164fff829cfSHong Zhang /* update il and jl for row i */ 1165fff829cfSHong Zhang il[i] = jmin; 1166fff829cfSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 116749b5e25fSSatish Balay } 1168ab27746eSHong Zhang i = nexti; 116949b5e25fSSatish Balay } 117049b5e25fSSatish Balay 1171997a0949SHong Zhang if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1172997a0949SHong Zhang /* calculate a shift that would make this row diagonally dominant */ 1173997a0949SHong Zhang PetscReal rs = PetscAbs(PetscRealPart(dk)); 1174997a0949SHong Zhang jmin = bi[k]+1; 1175997a0949SHong Zhang nz = bi[k+1] - jmin; 1176997a0949SHong Zhang if (nz){ 1177997a0949SHong Zhang bcol = bj + jmin; 1178997a0949SHong Zhang bval = ba + jmin; 1179997a0949SHong Zhang while (nz--){ 1180997a0949SHong Zhang rs += PetscAbsScalar(rtmp[*bcol++]); 1181997a0949SHong Zhang } 1182997a0949SHong Zhang } 1183997a0949SHong Zhang /* if this shift is less than the previous, just up the previous 1184997a0949SHong Zhang one by a bit */ 1185997a0949SHong Zhang shift_amount = PetscMax(rs,1.1*shift_amount); 1186997a0949SHong Zhang chshift = PETSC_TRUE; 1187997a0949SHong Zhang /* Unlike in the ILU case there is no exit condition on nshift: 1188997a0949SHong Zhang we increase the shift until it converges. There is no guarantee that 1189997a0949SHong Zhang this algorithm converges faster or slower, or is better or worse 1190997a0949SHong Zhang than the ILU algorithm. */ 1191997a0949SHong Zhang nshift++; 1192997a0949SHong Zhang break; 1193997a0949SHong Zhang } 1194997a0949SHong Zhang if (PetscRealPart(dk) < zeropivot){ 1195997a0949SHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1196997a0949SHong Zhang if (damping > 0.0) { 1197997a0949SHong Zhang if (ndamp) damping *= 2.0; 1198997a0949SHong Zhang damp = PETSC_TRUE; 1199997a0949SHong Zhang ndamp++; 1200997a0949SHong Zhang break; 1201997a0949SHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 1202997a0949SHong Zhang SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1203997a0949SHong Zhang } else { 1204997a0949SHong Zhang PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1205997a0949SHong Zhang } 120649b5e25fSSatish Balay } 120749b5e25fSSatish Balay 1208997a0949SHong Zhang /* copy data into U(k,:) */ 120998a8e78dSHong Zhang ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1210fff829cfSHong Zhang jmin = bi[k]+1; jmax = bi[k+1]; 121149b5e25fSSatish Balay if (jmin < jmax) { 121249b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 1213997a0949SHong Zhang col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 121449b5e25fSSatish Balay } 121598a8e78dSHong Zhang /* add the k-th row into il and jl */ 121649b5e25fSSatish Balay il[k] = jmin; 121798a8e78dSHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 121849b5e25fSSatish Balay } 121949b5e25fSSatish Balay } 1220997a0949SHong Zhang } while (damp||chshift); 122149b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 122298a8e78dSHong Zhang if (a->permute){ierr = PetscFree(aa);CHKERRQ(ierr);} 122349b5e25fSSatish Balay 122449b5e25fSSatish Balay ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 12258be8c0c7SHong Zhang C->factor = FACTOR_CHOLESKY; 122649b5e25fSSatish Balay C->assembled = PETSC_TRUE; 12275c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 122898a8e78dSHong Zhang PetscLogFlops(C->m); 1229997a0949SHong Zhang if (ndamp) { 1230997a0949SHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1: number of damping tries %D damping value %g\n",ndamp,damping); 1231997a0949SHong Zhang } 1232997a0949SHong Zhang if (nshift) { 1233997a0949SHong Zhang PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1 diagonal shifted %D shifts\n",nshift); 1234997a0949SHong Zhang } 123549b5e25fSSatish Balay PetscFunctionReturn(0); 123649b5e25fSSatish Balay } 123749b5e25fSSatish Balay 1238671cb588SHong Zhang /* 1239671cb588SHong Zhang Version for when blocks are 1 by 1 Using natural ordering 1240671cb588SHong Zhang */ 12414a2ae208SSatish Balay #undef __FUNCT__ 12424a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1243dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1244671cb588SHong Zhang { 1245671cb588SHong Zhang Mat C = *B; 1246671cb588SHong Zhang Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1247dfbe8321SBarry Smith PetscErrorCode ierr; 124813f74950SBarry Smith PetscInt i,j,mbs = a->mbs; 124913f74950SBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 125013f74950SBarry Smith PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1251653a6975SHong Zhang MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 125221c26570Svictorle PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 125321c26570Svictorle PetscTruth damp,chshift; 125413f74950SBarry Smith PetscInt nshift=0; 1255653a6975SHong Zhang 1256653a6975SHong Zhang PetscFunctionBegin; 1257653a6975SHong Zhang /* initialization */ 1258653a6975SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 1259653a6975SHong Zhang window U(0:k, k:mbs-1). 1260653a6975SHong Zhang jl: list of rows to be added to uneliminated rows 1261653a6975SHong Zhang i>= k: jl(i) is the first row to be added to row i 1262653a6975SHong Zhang i< k: jl(i) is the row following row i in some list of rows 1263653a6975SHong Zhang jl(i) = mbs indicates the end of a list 1264653a6975SHong Zhang il(i): points to the first nonzero element in U(i,k:mbs-1) 1265653a6975SHong Zhang */ 1266653a6975SHong Zhang ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 126713f74950SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1268653a6975SHong Zhang jl = il + mbs; 1269b00f7748SHong Zhang 127021c26570Svictorle shift_amount = 0; 1271b00f7748SHong Zhang do { 1272b00f7748SHong Zhang damp = PETSC_FALSE; 127321c26570Svictorle chshift = PETSC_FALSE; 1274653a6975SHong Zhang for (i=0; i<mbs; i++) { 1275653a6975SHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1276653a6975SHong Zhang } 1277653a6975SHong Zhang 1278997a0949SHong Zhang for (k = 0; k<mbs; k++){ 1279653a6975SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 1280653a6975SHong Zhang nz = ai[k+1] - ai[k]; 1281653a6975SHong Zhang acol = aj + ai[k]; 1282653a6975SHong Zhang aval = aa + ai[k]; 1283653a6975SHong Zhang bval = ba + bi[k]; 1284653a6975SHong Zhang while (nz -- ){ 1285653a6975SHong Zhang rtmp[*acol++] = *aval++; 1286653a6975SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 1287653a6975SHong Zhang } 1288b00f7748SHong Zhang /* damp the diagonal of the matrix */ 128921c26570Svictorle if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1290653a6975SHong Zhang 1291653a6975SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1292653a6975SHong Zhang dk = rtmp[k]; 1293653a6975SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 1294653a6975SHong Zhang 1295653a6975SHong Zhang while (i < k){ 1296653a6975SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 1297653a6975SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 1298653a6975SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1299653a6975SHong Zhang uikdi = - ba[ili]*ba[bi[i]]; 1300653a6975SHong Zhang dk += uikdi*ba[ili]; 1301653a6975SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 1302653a6975SHong Zhang 1303653a6975SHong Zhang /* add multiple of row i to k-th row ... */ 1304653a6975SHong Zhang jmin = ili + 1; 1305653a6975SHong Zhang nz = bi[i+1] - jmin; 1306653a6975SHong Zhang if (nz > 0){ 1307653a6975SHong Zhang bcol = bj + jmin; 1308653a6975SHong Zhang bval = ba + jmin; 1309653a6975SHong Zhang while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1310997a0949SHong Zhang /* update il and jl for i-th row */ 1311997a0949SHong Zhang il[i] = jmin; 1312997a0949SHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1313653a6975SHong Zhang } 1314653a6975SHong Zhang i = nexti; 1315653a6975SHong Zhang } 1316653a6975SHong Zhang 131721c26570Svictorle if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1318d530a6e7Svictorle /* calculate a shift that would make this row diagonally dominant */ 131929f69fd4SBarry Smith PetscReal rs = PetscAbs(PetscRealPart(dk)); 132021c26570Svictorle jmin = bi[k]+1; 132121c26570Svictorle nz = bi[k+1] - jmin; 132221c26570Svictorle if (nz){ 132321c26570Svictorle bcol = bj + jmin; 132421c26570Svictorle bval = ba + jmin; 132521c26570Svictorle while (nz--){ 132621c26570Svictorle rs += PetscAbsScalar(rtmp[*bcol++]); 132721c26570Svictorle } 132821c26570Svictorle } 1329d530a6e7Svictorle /* if this shift is less than the previous, just up the previous 1330d530a6e7Svictorle one by a bit */ 1331d530a6e7Svictorle shift_amount = PetscMax(rs,1.1*shift_amount); 133221c26570Svictorle chshift = PETSC_TRUE; 1333d530a6e7Svictorle /* Unlike in the ILU case there is no exit condition on nshift: 1334d530a6e7Svictorle we increase the shift until it converges. There is no guarantee that 1335d530a6e7Svictorle this algorithm converges faster or slower, or is better or worse 1336d530a6e7Svictorle than the ILU algorithm. */ 133721c26570Svictorle nshift++; 133821c26570Svictorle break; 133921c26570Svictorle } 1340ffa0b812SHong Zhang if (PetscRealPart(dk) < zeropivot){ 1341ffa0b812SHong Zhang if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1342b00f7748SHong Zhang if (damping > 0.0) { 1343b00f7748SHong Zhang if (ndamp) damping *= 2.0; 1344b00f7748SHong Zhang damp = PETSC_TRUE; 1345b00f7748SHong Zhang ndamp++; 1346b00f7748SHong Zhang break; 1347481c2c92SHong Zhang } else if (PetscAbsScalar(dk) < zeropivot){ 134877431f27SBarry Smith SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1349481c2c92SHong Zhang } else { 135077431f27SBarry Smith PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1351b00f7748SHong Zhang } 1352064503c5SHong Zhang } 1353653a6975SHong Zhang 1354997a0949SHong Zhang /* copy data into U(k,:) */ 1355653a6975SHong Zhang ba[bi[k]] = 1.0/dk; 1356653a6975SHong Zhang jmin = bi[k]+1; 1357653a6975SHong Zhang nz = bi[k+1] - jmin; 1358653a6975SHong Zhang if (nz){ 1359653a6975SHong Zhang bcol = bj + jmin; 1360653a6975SHong Zhang bval = ba + jmin; 1361653a6975SHong Zhang while (nz--){ 1362653a6975SHong Zhang *bval++ = rtmp[*bcol]; 1363653a6975SHong Zhang rtmp[*bcol++] = 0.0; 1364653a6975SHong Zhang } 1365997a0949SHong Zhang /* add k-th row into il and jl */ 1366653a6975SHong Zhang il[k] = jmin; 1367997a0949SHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1368653a6975SHong Zhang } 1369b00f7748SHong Zhang } /* end of for (k = 0; k<mbs; k++) */ 137021c26570Svictorle } while (damp||chshift); 1371653a6975SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1372653a6975SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1373653a6975SHong Zhang 1374653a6975SHong Zhang C->factor = FACTOR_CHOLESKY; 1375653a6975SHong Zhang C->assembled = PETSC_TRUE; 1376653a6975SHong Zhang C->preallocated = PETSC_TRUE; 1377997a0949SHong Zhang PetscLogFlops(C->m); 1378b00f7748SHong Zhang if (ndamp) { 137977431f27SBarry Smith PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1380b00f7748SHong Zhang } 1381fb10cecfSBarry Smith if (nshift) { 138277431f27SBarry Smith PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1383fb10cecfSBarry Smith } 1384653a6975SHong Zhang PetscFunctionReturn(0); 1385653a6975SHong Zhang } 1386653a6975SHong Zhang 1387653a6975SHong Zhang #undef __FUNCT__ 13884a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1389dfbe8321SBarry Smith PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 139049b5e25fSSatish Balay { 1391dfbe8321SBarry Smith PetscErrorCode ierr; 139249b5e25fSSatish Balay Mat C; 139349b5e25fSSatish Balay 139449b5e25fSSatish Balay PetscFunctionBegin; 139515e8a5b3SHong Zhang ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1396a4ada70bSHong Zhang ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 13974445ddedSHong Zhang ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 139849b5e25fSSatish Balay PetscFunctionReturn(0); 139949b5e25fSSatish Balay } 140049b5e25fSSatish Balay 140149b5e25fSSatish Balay 1402