173f4d377SMatthew Knepley /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 43a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 549b5e25fSSatish Balay #include "src/vec/vecimpl.h" 649b5e25fSSatish Balay #include "src/inline/ilu.h" 749a6740bSHong Zhang #include "include/petscis.h" 849b5e25fSSatish Balay 98dc9108eSHong Zhang #if !defined(PETSC_USE_COMPLEX) 105f9f512dSHong Zhang /* 115f9f512dSHong Zhang input: 12c037c3f7SHong Zhang F -- numeric factor 135f9f512dSHong Zhang output: 14c037c3f7SHong Zhang nneg, nzero, npos: matrix inertia 155f9f512dSHong Zhang */ 165f9f512dSHong Zhang 175f9f512dSHong Zhang #undef __FUNCT__ 185f9f512dSHong Zhang #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 19c037c3f7SHong Zhang int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 205f9f512dSHong Zhang { 21638f5ce0SDinesh Kaushik Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 223e0d88b5SBarry Smith PetscScalar *dd = fact_ptr->a; 23c037c3f7SHong Zhang int m = F->m,i; 245f9f512dSHong Zhang 255f9f512dSHong Zhang PetscFunctionBegin; 263e0d88b5SBarry Smith if (nneig){ 275f9f512dSHong Zhang *nneig = 0; 285f9f512dSHong Zhang for (i=0; i<m; i++){ 29b36a9721SBarry Smith if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 305f9f512dSHong Zhang } 313e0d88b5SBarry Smith } 323e0d88b5SBarry Smith if (nzero){ 333e0d88b5SBarry Smith *nzero = 0; 343e0d88b5SBarry Smith for (i=0; i<m; i++){ 353e0d88b5SBarry Smith if (PetscRealPart(dd[i]) == 0.0) (*nzero)++; 363e0d88b5SBarry Smith } 373e0d88b5SBarry Smith } 383e0d88b5SBarry Smith if (npos){ 393e0d88b5SBarry Smith *npos = 0; 403e0d88b5SBarry Smith for (i=0; i<m; i++){ 413e0d88b5SBarry Smith if (PetscRealPart(dd[i]) > 0.0) (*npos)++; 423e0d88b5SBarry Smith } 433e0d88b5SBarry Smith } 445f9f512dSHong Zhang PetscFunctionReturn(0); 455f9f512dSHong Zhang } 468dc9108eSHong Zhang #endif /* !defined(PETSC_USE_COMPLEX) */ 475f9f512dSHong Zhang 485f9f512dSHong Zhang /* Using Modified Sparse Row (MSR) storage. 495f9f512dSHong Zhang See page 85, "Iterative Methods ..." by Saad. */ 505f9f512dSHong Zhang /* 515f9f512dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 525f9f512dSHong Zhang */ 53*2d9a3abdSHong Zhang /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 544a2ae208SSatish Balay #undef __FUNCT__ 554a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 5677f73120SHong Zhang int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 5749b5e25fSSatish Balay { 5849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 59cb718733SHong Zhang int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 60066653e3SSatish Balay int *jutmp,bs = a->bs,bs2=a->bs2; 6187fe1012SHong Zhang int m,realloc = 0,prow; 62d60e5362SHong Zhang int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 63671cb588SHong Zhang PetscTruth perm_identity; 6449b5e25fSSatish Balay 6549b5e25fSSatish Balay PetscFunctionBegin; 662d007305SHong Zhang 67cb718733SHong Zhang /* check whether perm is the identity mapping */ 68671cb588SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 69671cb588SHong Zhang if (!perm_identity) a->permute = PETSC_TRUE; 702d007305SHong Zhang 71671cb588SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 72671cb588SHong Zhang 73671cb588SHong Zhang if (perm_identity){ /* without permutation */ 742d007305SHong Zhang ai = a->i; aj = a->j; 752d007305SHong Zhang } else { /* non-trivial permutation */ 76323b833fSBarry Smith ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 772d007305SHong Zhang ai = a->inew; aj = a->jnew; 782d007305SHong Zhang } 792d007305SHong Zhang 8049b5e25fSSatish Balay /* initialization */ 81b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 8249b5e25fSSatish Balay umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 8382502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 8449b5e25fSSatish Balay iu[0] = mbs+1; 8587fe1012SHong Zhang juidx = mbs + 1; /* index for ju */ 8687fe1012SHong Zhang ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 8787fe1012SHong Zhang q = jl + mbs; /* linked list for col index */ 8849b5e25fSSatish Balay for (i=0; i<mbs; i++){ 8987fe1012SHong Zhang jl[i] = mbs; 9087fe1012SHong Zhang q[i] = 0; 9149b5e25fSSatish Balay } 9249b5e25fSSatish Balay 9349b5e25fSSatish Balay /* for each row k */ 9449b5e25fSSatish Balay for (k=0; k<mbs; k++){ 9549b5e25fSSatish Balay nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 9649b5e25fSSatish Balay q[k] = mbs; 9749b5e25fSSatish Balay /* initialize nonzero structure of k-th row to row rip[k] of A */ 9849b5e25fSSatish Balay jmin = ai[rip[k]]; 9949b5e25fSSatish Balay jmax = ai[rip[k]+1]; 10049b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 101cb718733SHong Zhang vj = rip[aj[j]]; /* col. value */ 10249b5e25fSSatish Balay if(vj > k){ 10349b5e25fSSatish Balay qm = k; 10449b5e25fSSatish Balay do { 10549b5e25fSSatish Balay m = qm; qm = q[m]; 10649b5e25fSSatish Balay } while(qm < vj); 10749b5e25fSSatish Balay if (qm == vj) { 1083078e140SBarry Smith SETERRQ(1," error: duplicate entry in A\n"); 10949b5e25fSSatish Balay } 11049b5e25fSSatish Balay nzk++; 11149b5e25fSSatish Balay q[m] = vj; 11249b5e25fSSatish Balay q[vj] = qm; 11349b5e25fSSatish Balay } /* if(vj > k) */ 11449b5e25fSSatish Balay } /* for (j=jmin; j<jmax; j++) */ 11549b5e25fSSatish Balay 11649b5e25fSSatish Balay /* modify nonzero structure of k-th row by computing fill-in 11749b5e25fSSatish Balay for each row i to be merged in */ 11887fe1012SHong Zhang prow = k; 11987fe1012SHong Zhang prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 12087fe1012SHong Zhang 12187fe1012SHong Zhang while (prow < k){ 12287fe1012SHong Zhang /* merge row prow into k-th row */ 12387fe1012SHong Zhang jmin = iu[prow] + 1; jmax = iu[prow+1]; 12449b5e25fSSatish Balay qm = k; 12587fe1012SHong Zhang for (j=jmin; j<jmax; j++){ 12649b5e25fSSatish Balay vj = ju[j]; 12749b5e25fSSatish Balay do { 12849b5e25fSSatish Balay m = qm; qm = q[m]; 12949b5e25fSSatish Balay } while (qm < vj); 13049b5e25fSSatish Balay if (qm != vj){ 13149b5e25fSSatish Balay nzk++; q[m] = vj; q[vj] = qm; qm = vj; 13249b5e25fSSatish Balay } 13349b5e25fSSatish Balay } 13487fe1012SHong Zhang prow = jl[prow]; /* next pivot row */ 13549b5e25fSSatish Balay } 13649b5e25fSSatish Balay 13749b5e25fSSatish Balay /* add k to row list for first nonzero element in k-th row */ 13849b5e25fSSatish Balay if (nzk > 0){ 13949b5e25fSSatish Balay i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 14049b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 14149b5e25fSSatish Balay } 14287fe1012SHong Zhang iu[k+1] = iu[k] + nzk; 14349b5e25fSSatish Balay 14449b5e25fSSatish Balay /* allocate more space to ju if needed */ 1453078e140SBarry Smith if (iu[k+1] > umax) { 14649b5e25fSSatish Balay /* estimate how much additional space we will need */ 14749b5e25fSSatish Balay /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 14849b5e25fSSatish Balay /* just double the memory each time */ 14949b5e25fSSatish Balay maxadd = umax; 15049b5e25fSSatish Balay if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 15149b5e25fSSatish Balay umax += maxadd; 15249b5e25fSSatish Balay 1539f9cb213SHong Zhang /* allocate a longer ju */ 15482502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 15549b5e25fSSatish Balay ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 15649b5e25fSSatish Balay ierr = PetscFree(ju);CHKERRQ(ierr); 1579f9cb213SHong Zhang ju = jutmp; 15849b5e25fSSatish Balay realloc++; /* count how many times we realloc */ 15949b5e25fSSatish Balay } 16049b5e25fSSatish Balay 16149b5e25fSSatish Balay /* save nonzero structure of k-th row in ju */ 16249b5e25fSSatish Balay i=k; 16387fe1012SHong Zhang while (nzk --) { 16449b5e25fSSatish Balay i = q[i]; 16587fe1012SHong Zhang ju[juidx++] = i; 16649b5e25fSSatish Balay } 16777f73120SHong Zhang } 16849b5e25fSSatish Balay 16949b5e25fSSatish Balay if (ai[mbs] != 0) { 17049b5e25fSSatish Balay PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 171b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1723078e140SBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1733078e140SBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 174b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 17549b5e25fSSatish Balay } else { 176b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 17749b5e25fSSatish Balay } 17849b5e25fSSatish Balay 17977f73120SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 18087fe1012SHong Zhang /* ierr = PetscFree(q);CHKERRQ(ierr); */ 18149b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 18249b5e25fSSatish Balay 18349b5e25fSSatish Balay /* put together the new matrix */ 18449b5e25fSSatish Balay ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 185b0a32e0cSBarry Smith /* PetscLogObjectParent(*B,iperm); */ 18649b5e25fSSatish Balay b = (Mat_SeqSBAIJ*)(*B)->data; 18749b5e25fSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 18849b5e25fSSatish Balay b->singlemalloc = PETSC_FALSE; 18949b5e25fSSatish Balay /* the next line frees the default space generated by the Create() */ 19049b5e25fSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 19149b5e25fSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 19282502324SSatish Balay ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 19349b5e25fSSatish Balay b->j = ju; 19449b5e25fSSatish Balay b->i = iu; 19549b5e25fSSatish Balay b->diag = 0; 19649b5e25fSSatish Balay b->ilen = 0; 19749b5e25fSSatish Balay b->imax = 0; 19877f73120SHong Zhang b->row = perm; 199bcd9e38bSBarry Smith b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 20077f73120SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 201cb718733SHong Zhang b->icol = perm; 202cb718733SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 20387828ca2SBarry Smith ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 20449b5e25fSSatish Balay /* In b structure: Free imax, ilen, old a, old j. 20549b5e25fSSatish Balay Allocate idnew, solve_work, new a, new j */ 206b0a32e0cSBarry Smith PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 20749b5e25fSSatish Balay b->s_maxnz = b->s_nz = iu[mbs]; 20849b5e25fSSatish Balay 2095c0bcdfcSHong Zhang (*B)->factor = FACTOR_CHOLESKY; 21049b5e25fSSatish Balay (*B)->info.factor_mallocs = realloc; 21149b5e25fSSatish Balay (*B)->info.fill_ratio_given = f; 21249b5e25fSSatish Balay if (ai[mbs] != 0) { 21349b5e25fSSatish Balay (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 21449b5e25fSSatish Balay } else { 21549b5e25fSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 21649b5e25fSSatish Balay } 2170aa0ce06SHong Zhang 218671cb588SHong Zhang if (perm_identity){ 219671cb588SHong Zhang switch (bs) { 220671cb588SHong Zhang case 1: 221671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 222671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2234d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 224671cb588SHong Zhang break; 225671cb588SHong Zhang case 2: 226671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 227671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2284d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 229671cb588SHong Zhang break; 230671cb588SHong Zhang case 3: 231671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 232671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2334d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 234671cb588SHong Zhang break; 235671cb588SHong Zhang case 4: 236671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 237671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2384d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 239671cb588SHong Zhang break; 240671cb588SHong Zhang case 5: 241671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 242671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2434d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 244671cb588SHong Zhang break; 245671cb588SHong Zhang case 6: 246671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 247671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2484d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 249671cb588SHong Zhang break; 250671cb588SHong Zhang case 7: 251671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 252671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2534d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 254671cb588SHong Zhang break; 255671cb588SHong Zhang default: 256671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 257671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2584d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 259671cb588SHong Zhang break; 260671cb588SHong Zhang } 261671cb588SHong Zhang } 262671cb588SHong Zhang 26349b5e25fSSatish Balay PetscFunctionReturn(0); 26449b5e25fSSatish Balay } 26549b5e25fSSatish Balay 26635d8aa7fSBarry Smith 2674a2ae208SSatish Balay #undef __FUNCT__ 2684a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 2696f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 27049b5e25fSSatish Balay { 27149b5e25fSSatish Balay Mat C = *B; 2724c16a6a6SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 2734c16a6a6SHong Zhang IS perm = b->row; 2744c16a6a6SHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 2754c16a6a6SHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 2764c16a6a6SHong Zhang int bs=a->bs,bs2 = a->bs2; 2774c16a6a6SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 2784c16a6a6SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 27928de702eSHong Zhang MatScalar *work; 2804c16a6a6SHong Zhang int *pivots; 2814c16a6a6SHong Zhang 2824c16a6a6SHong Zhang PetscFunctionBegin; 2839706f043SHong Zhang 2844c16a6a6SHong Zhang /* initialization */ 28582502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2864c16a6a6SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 28782502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 28828de702eSHong Zhang jl = il + mbs; 2894c16a6a6SHong Zhang for (i=0; i<mbs; i++) { 2904c16a6a6SHong Zhang jl[i] = mbs; il[0] = 0; 2914c16a6a6SHong Zhang } 292b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 29328de702eSHong Zhang uik = dk + bs2; 29428de702eSHong Zhang work = uik + bs2; 29582502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 2964c16a6a6SHong Zhang 2974c16a6a6SHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 2984c16a6a6SHong Zhang 2994c16a6a6SHong Zhang /* check permutation */ 3004c16a6a6SHong Zhang if (!a->permute){ 3014c16a6a6SHong Zhang ai = a->i; aj = a->j; aa = a->a; 3024c16a6a6SHong Zhang } else { 3034c16a6a6SHong Zhang ai = a->inew; aj = a->jnew; 30482502324SSatish Balay ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 3054c16a6a6SHong Zhang ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 30682502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 3074c16a6a6SHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 3084c16a6a6SHong Zhang 3094c16a6a6SHong Zhang for (i=0; i<mbs; i++){ 3104c16a6a6SHong Zhang jmin = ai[i]; jmax = ai[i+1]; 3114c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 3124c16a6a6SHong Zhang while (a2anew[j] != j){ 3134c16a6a6SHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 3144c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 3154c16a6a6SHong Zhang dk[k1] = aa[k*bs2+k1]; 3164c16a6a6SHong Zhang aa[k*bs2+k1] = aa[j*bs2+k1]; 3174c16a6a6SHong Zhang aa[j*bs2+k1] = dk[k1]; 3184c16a6a6SHong Zhang } 3194c16a6a6SHong Zhang } 3204c16a6a6SHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 3214c16a6a6SHong Zhang if (i > aj[j]){ 3224c16a6a6SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 3234c16a6a6SHong Zhang ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 3244c16a6a6SHong Zhang for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 3254c16a6a6SHong Zhang for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 3264c16a6a6SHong Zhang for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 3274c16a6a6SHong Zhang } 3284c16a6a6SHong Zhang } 3294c16a6a6SHong Zhang } 3304c16a6a6SHong Zhang } 331323b833fSBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 3324c16a6a6SHong Zhang } 3334c16a6a6SHong Zhang 3344c16a6a6SHong Zhang /* for each row k */ 3354c16a6a6SHong Zhang for (k = 0; k<mbs; k++){ 3364c16a6a6SHong Zhang 3374c16a6a6SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 3384c16a6a6SHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 339057f5ba7SHong Zhang 3404c16a6a6SHong Zhang ap = aa + jmin*bs2; 3414c16a6a6SHong Zhang for (j = jmin; j < jmax; j++){ 3424c16a6a6SHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 3434c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 3444c16a6a6SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 3454c16a6a6SHong Zhang } 3464c16a6a6SHong Zhang 3474c16a6a6SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 3484c16a6a6SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3494c16a6a6SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 3504c16a6a6SHong Zhang 351057f5ba7SHong Zhang while (i < k){ 3524c16a6a6SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 3534c16a6a6SHong Zhang 3544c16a6a6SHong Zhang /* compute multiplier */ 3554c16a6a6SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 3564c16a6a6SHong Zhang 3574c16a6a6SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 3584c16a6a6SHong Zhang diag = ba + i*bs2; 3594c16a6a6SHong Zhang u = ba + ili*bs2; 3604c16a6a6SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3614c16a6a6SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 3624c16a6a6SHong Zhang 3634c16a6a6SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 3644c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 3654c16a6a6SHong Zhang 3664c16a6a6SHong Zhang /* update -U(i,k) */ 3674c16a6a6SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3684c16a6a6SHong Zhang 3694c16a6a6SHong Zhang /* add multiple of row i to k-th row ... */ 3704c16a6a6SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 3714c16a6a6SHong Zhang if (jmin < jmax){ 3724c16a6a6SHong Zhang for (j=jmin; j<jmax; j++) { 3734c16a6a6SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 3744c16a6a6SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 3754c16a6a6SHong Zhang u = ba + j*bs2; 3764c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 3774c16a6a6SHong Zhang } 3784c16a6a6SHong Zhang 3794c16a6a6SHong Zhang /* ... add i to row list for next nonzero entry */ 3804c16a6a6SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 3814c16a6a6SHong Zhang j = bj[jmin]; 3824c16a6a6SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 3834c16a6a6SHong Zhang } 3844c16a6a6SHong Zhang i = nexti; 3854c16a6a6SHong Zhang } 3864c16a6a6SHong Zhang 3874c16a6a6SHong Zhang /* save nonzero entries in k-th row of U ... */ 3884c16a6a6SHong Zhang 3894c16a6a6SHong Zhang /* invert diagonal block */ 3904c16a6a6SHong Zhang diag = ba+k*bs2; 3914c16a6a6SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3924c16a6a6SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 3934c16a6a6SHong Zhang 3944c16a6a6SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 3954c16a6a6SHong Zhang if (jmin < jmax) { 3964c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 3974c16a6a6SHong Zhang vj = bj[j]; /* block col. index of U */ 3984c16a6a6SHong Zhang u = ba + j*bs2; 3994c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 4004c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 4014c16a6a6SHong Zhang *u++ = *rtmp_ptr; 4024c16a6a6SHong Zhang *rtmp_ptr++ = 0.0; 4034c16a6a6SHong Zhang } 4044c16a6a6SHong Zhang } 4054c16a6a6SHong Zhang 4064c16a6a6SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 4074c16a6a6SHong Zhang il[k] = jmin; 4084c16a6a6SHong Zhang i = bj[jmin]; 4094c16a6a6SHong Zhang jl[k] = jl[i]; jl[i] = k; 4104c16a6a6SHong Zhang } 4114c16a6a6SHong Zhang } 4124c16a6a6SHong Zhang 4134c16a6a6SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 4144c16a6a6SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 4154c16a6a6SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 4164c16a6a6SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 4174c16a6a6SHong Zhang if (a->permute){ 4184c16a6a6SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 4194c16a6a6SHong Zhang } 4204c16a6a6SHong Zhang 4214c16a6a6SHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 4224c16a6a6SHong Zhang C->factor = FACTOR_CHOLESKY; 4234c16a6a6SHong Zhang C->assembled = PETSC_TRUE; 4244c16a6a6SHong Zhang C->preallocated = PETSC_TRUE; 425b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 4264c16a6a6SHong Zhang PetscFunctionReturn(0); 4274c16a6a6SHong Zhang } 428d4edadd4SHong Zhang 4294a2ae208SSatish Balay #undef __FUNCT__ 4304a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 431671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 432671cb588SHong Zhang { 433671cb588SHong Zhang Mat C = *B; 434671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 435671cb588SHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 4361ad1de41SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 437671cb588SHong Zhang int bs=a->bs,bs2 = a->bs2; 438671cb588SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 439671cb588SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 44028de702eSHong Zhang MatScalar *work; 441671cb588SHong Zhang int *pivots; 442671cb588SHong Zhang 443671cb588SHong Zhang PetscFunctionBegin; 444671cb588SHong Zhang 445671cb588SHong Zhang /* initialization */ 446671cb588SHong Zhang 44782502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 448671cb588SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 44982502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 45028de702eSHong Zhang jl = il + mbs; 451671cb588SHong Zhang for (i=0; i<mbs; i++) { 452671cb588SHong Zhang jl[i] = mbs; il[0] = 0; 453671cb588SHong Zhang } 454b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 45528de702eSHong Zhang uik = dk + bs2; 45628de702eSHong Zhang work = uik + bs2; 45782502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 458671cb588SHong Zhang 459671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 460671cb588SHong Zhang 461671cb588SHong Zhang /* for each row k */ 462671cb588SHong Zhang for (k = 0; k<mbs; k++){ 463671cb588SHong Zhang 464671cb588SHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 465671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 466671cb588SHong Zhang ap = aa + jmin*bs2; 467671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 468671cb588SHong Zhang vj = aj[j]; /* block col. index */ 469671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 470671cb588SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 471671cb588SHong Zhang } 472671cb588SHong Zhang 473671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 474671cb588SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 475671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 476671cb588SHong Zhang 477057f5ba7SHong Zhang while (i < k){ 478671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 479671cb588SHong Zhang 480671cb588SHong Zhang /* compute multiplier */ 481671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 482671cb588SHong Zhang 483671cb588SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 484671cb588SHong Zhang diag = ba + i*bs2; 485671cb588SHong Zhang u = ba + ili*bs2; 486671cb588SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 487671cb588SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 488671cb588SHong Zhang 489671cb588SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 490671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 491671cb588SHong Zhang 492671cb588SHong Zhang /* update -U(i,k) */ 493671cb588SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 494671cb588SHong Zhang 495671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 496671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 497671cb588SHong Zhang if (jmin < jmax){ 498671cb588SHong Zhang for (j=jmin; j<jmax; j++) { 499671cb588SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 500671cb588SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 501671cb588SHong Zhang u = ba + j*bs2; 502671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 503671cb588SHong Zhang } 504671cb588SHong Zhang 505671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 506671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 507671cb588SHong Zhang j = bj[jmin]; 508671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 509671cb588SHong Zhang } 510671cb588SHong Zhang i = nexti; 511671cb588SHong Zhang } 512671cb588SHong Zhang 513671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 514671cb588SHong Zhang 515671cb588SHong Zhang /* invert diagonal block */ 516671cb588SHong Zhang diag = ba+k*bs2; 517671cb588SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 518671cb588SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 519671cb588SHong Zhang 520671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 521671cb588SHong Zhang if (jmin < jmax) { 522671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 523671cb588SHong Zhang vj = bj[j]; /* block col. index of U */ 524671cb588SHong Zhang u = ba + j*bs2; 525671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 526671cb588SHong Zhang for (k1=0; k1<bs2; k1++){ 527671cb588SHong Zhang *u++ = *rtmp_ptr; 528671cb588SHong Zhang *rtmp_ptr++ = 0.0; 529671cb588SHong Zhang } 530671cb588SHong Zhang } 531671cb588SHong Zhang 532671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 533671cb588SHong Zhang il[k] = jmin; 534671cb588SHong Zhang i = bj[jmin]; 535671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 536671cb588SHong Zhang } 537671cb588SHong Zhang } 538671cb588SHong Zhang 539671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 540671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 541671cb588SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 542671cb588SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 543671cb588SHong Zhang 544671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 545671cb588SHong Zhang C->assembled = PETSC_TRUE; 546671cb588SHong Zhang C->preallocated = PETSC_TRUE; 547b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 548671cb588SHong Zhang PetscFunctionReturn(0); 549671cb588SHong Zhang } 550671cb588SHong Zhang 55149b5e25fSSatish Balay /* 552fcf159c0SHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 553cc0c071aSHong Zhang Version for blocks 2 by 2. 55449b5e25fSSatish Balay */ 5554a2ae208SSatish Balay #undef __FUNCT__ 5564a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 5576f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 55849b5e25fSSatish Balay { 55949b5e25fSSatish Balay Mat C = *B; 56091602c66SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 561cc0c071aSHong Zhang IS perm = b->row; 562cc0c071aSHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 563cc0c071aSHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 564a1723e09SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 565cc0c071aSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 56649b5e25fSSatish Balay 56749b5e25fSSatish Balay PetscFunctionBegin; 568671cb588SHong Zhang 56991602c66SHong Zhang /* initialization */ 57091602c66SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 57191602c66SHong Zhang window U(0:k, k:mbs-1). 57291602c66SHong Zhang jl: list of rows to be added to uneliminated rows 57391602c66SHong Zhang i>= k: jl(i) is the first row to be added to row i 57491602c66SHong Zhang i< k: jl(i) is the row following row i in some list of rows 57591602c66SHong Zhang jl(i) = mbs indicates the end of a list 57691602c66SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 57791602c66SHong Zhang row i of U */ 57882502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 579cc0c071aSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 58082502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 58128de702eSHong Zhang jl = il + mbs; 58291602c66SHong Zhang for (i=0; i<mbs; i++) { 5833845f261SHong Zhang jl[i] = mbs; il[0] = 0; 58491602c66SHong Zhang } 58582502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 58628de702eSHong Zhang uik = dk + 4; 587cc0c071aSHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 588cc0c071aSHong Zhang 589cc0c071aSHong Zhang /* check permutation */ 590cc0c071aSHong Zhang if (!a->permute){ 591cc0c071aSHong Zhang ai = a->i; aj = a->j; aa = a->a; 592cc0c071aSHong Zhang } else { 593cc0c071aSHong Zhang ai = a->inew; aj = a->jnew; 59482502324SSatish Balay ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 595cc0c071aSHong Zhang ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 59682502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 597cc0c071aSHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 598cc0c071aSHong Zhang 599cc0c071aSHong Zhang for (i=0; i<mbs; i++){ 600cc0c071aSHong Zhang jmin = ai[i]; jmax = ai[i+1]; 601cc0c071aSHong Zhang for (j=jmin; j<jmax; j++){ 602cc0c071aSHong Zhang while (a2anew[j] != j){ 603cc0c071aSHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 604cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 605cc0c071aSHong Zhang dk[k1] = aa[k*4+k1]; 606cc0c071aSHong Zhang aa[k*4+k1] = aa[j*4+k1]; 607cc0c071aSHong Zhang aa[j*4+k1] = dk[k1]; 608cc0c071aSHong Zhang } 609cc0c071aSHong Zhang } 610cc0c071aSHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 611cc0c071aSHong Zhang if (i > aj[j]){ 612a1723e09SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 613cc0c071aSHong Zhang ap = aa + j*4; /* ptr to the beginning of the block */ 614cc0c071aSHong Zhang dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 615cc0c071aSHong Zhang ap[1] = ap[2]; 616cc0c071aSHong Zhang ap[2] = dk[1]; 617cc0c071aSHong Zhang } 618cc0c071aSHong Zhang } 619cc0c071aSHong Zhang } 620ac355199SBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 621cc0c071aSHong Zhang } 6223845f261SHong Zhang 62391602c66SHong Zhang /* for each row k */ 62491602c66SHong Zhang for (k = 0; k<mbs; k++){ 62591602c66SHong Zhang 62691602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 627cc0c071aSHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 628cc0c071aSHong Zhang ap = aa + jmin*4; 62991602c66SHong Zhang for (j = jmin; j < jmax; j++){ 630cc0c071aSHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 631cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 632cc0c071aSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 63391602c66SHong Zhang } 63491602c66SHong Zhang 63591602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 636cc0c071aSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 63791602c66SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 63891602c66SHong Zhang 639057f5ba7SHong Zhang while (i < k){ 64091602c66SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 64191602c66SHong Zhang 6423845f261SHong Zhang /* compute multiplier */ 64391602c66SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 6443845f261SHong Zhang 6453845f261SHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 646cc0c071aSHong Zhang diag = ba + i*4; 647cc0c071aSHong Zhang u = ba + ili*4; 648cc0c071aSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 649cc0c071aSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 650cc0c071aSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 651cc0c071aSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 6523845f261SHong Zhang 6533845f261SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 654cc0c071aSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 655cc0c071aSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 656cc0c071aSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 657cc0c071aSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 6583845f261SHong Zhang 6593845f261SHong Zhang /* update -U(i,k): ba[ili] = uik */ 660cc0c071aSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 66191602c66SHong Zhang 66291602c66SHong Zhang /* add multiple of row i to k-th row ... */ 66391602c66SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 66491602c66SHong Zhang if (jmin < jmax){ 6653845f261SHong Zhang for (j=jmin; j<jmax; j++) { 6663845f261SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 667cc0c071aSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 668cc0c071aSHong Zhang u = ba + j*4; 669cc0c071aSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 670cc0c071aSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 671cc0c071aSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 672cc0c071aSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 6733845f261SHong Zhang } 6743845f261SHong Zhang 67591602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 67691602c66SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 67791602c66SHong Zhang j = bj[jmin]; 67891602c66SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 67991602c66SHong Zhang } 680a1723e09SHong Zhang i = nexti; 68191602c66SHong Zhang } 682cc0c071aSHong Zhang 68391602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 6843845f261SHong Zhang 685cc0c071aSHong Zhang /* invert diagonal block */ 686cc0c071aSHong Zhang diag = ba+k*4; 687cc0c071aSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 6883845f261SHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 6893845f261SHong Zhang 69091602c66SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 69191602c66SHong Zhang if (jmin < jmax) { 69291602c66SHong Zhang for (j=jmin; j<jmax; j++){ 693cc0c071aSHong Zhang vj = bj[j]; /* block col. index of U */ 694cc0c071aSHong Zhang u = ba + j*4; 695cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 696cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 697cc0c071aSHong Zhang *u++ = *rtmp_ptr; 698cc0c071aSHong Zhang *rtmp_ptr++ = 0.0; 699cc0c071aSHong Zhang } 700cc0c071aSHong Zhang } 7013845f261SHong Zhang 70291602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 70391602c66SHong Zhang il[k] = jmin; 70491602c66SHong Zhang i = bj[jmin]; 70591602c66SHong Zhang jl[k] = jl[i]; jl[i] = k; 70691602c66SHong Zhang } 70791602c66SHong Zhang } 7083845f261SHong Zhang 70949b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 71091602c66SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 7113845f261SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 71291602c66SHong Zhang if (a->permute) { 71391602c66SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 71491602c66SHong Zhang } 715cc0c071aSHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 71691602c66SHong Zhang C->factor = FACTOR_CHOLESKY; 71749b5e25fSSatish Balay C->assembled = PETSC_TRUE; 7185c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 719b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 72049b5e25fSSatish Balay PetscFunctionReturn(0); 72149b5e25fSSatish Balay } 72291602c66SHong Zhang 72349b5e25fSSatish Balay /* 72449b5e25fSSatish Balay Version for when blocks are 2 by 2 Using natural ordering 72549b5e25fSSatish Balay */ 7264a2ae208SSatish Balay #undef __FUNCT__ 7274a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 7286f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 72949b5e25fSSatish Balay { 73049b5e25fSSatish Balay Mat C = *B; 731ab27746eSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 732ab27746eSHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 73371149678SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 734ab27746eSHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 735ab27746eSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 73649b5e25fSSatish Balay 73749b5e25fSSatish Balay PetscFunctionBegin; 738671cb588SHong Zhang 739ab27746eSHong Zhang /* initialization */ 740ab27746eSHong Zhang /* il and jl record the first nonzero element in each row of the accessing 741ab27746eSHong Zhang window U(0:k, k:mbs-1). 742ab27746eSHong Zhang jl: list of rows to be added to uneliminated rows 743ab27746eSHong Zhang i>= k: jl(i) is the first row to be added to row i 744ab27746eSHong Zhang i< k: jl(i) is the row following row i in some list of rows 745ab27746eSHong Zhang jl(i) = mbs indicates the end of a list 746ab27746eSHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 747ab27746eSHong Zhang row i of U */ 74882502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 749ab27746eSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 75082502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 75128de702eSHong Zhang jl = il + mbs; 752ab27746eSHong Zhang for (i=0; i<mbs; i++) { 753ab27746eSHong Zhang jl[i] = mbs; il[0] = 0; 75449b5e25fSSatish Balay } 75582502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 75628de702eSHong Zhang uik = dk + 4; 757ab27746eSHong Zhang 758ab27746eSHong Zhang ai = a->i; aj = a->j; aa = a->a; 759ab27746eSHong Zhang 760ab27746eSHong Zhang /* for each row k */ 761ab27746eSHong Zhang for (k = 0; k<mbs; k++){ 762ab27746eSHong Zhang 763ab27746eSHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 764ab27746eSHong Zhang jmin = ai[k]; jmax = ai[k+1]; 765ab27746eSHong Zhang ap = aa + jmin*4; 766ab27746eSHong Zhang for (j = jmin; j < jmax; j++){ 767ab27746eSHong Zhang vj = aj[j]; /* block col. index */ 768ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 769ab27746eSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 77049b5e25fSSatish Balay } 771ab27746eSHong Zhang 772ab27746eSHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 773ab27746eSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 774ab27746eSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 775ab27746eSHong Zhang 776057f5ba7SHong Zhang while (i < k){ 777ab27746eSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 778ab27746eSHong Zhang 779ab27746eSHong Zhang /* compute multiplier */ 780ab27746eSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 781ab27746eSHong Zhang 782ab27746eSHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 783ab27746eSHong Zhang diag = ba + i*4; 784ab27746eSHong Zhang u = ba + ili*4; 785ab27746eSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 786ab27746eSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 787ab27746eSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 788ab27746eSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 789ab27746eSHong Zhang 790ab27746eSHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 791ab27746eSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 792ab27746eSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 793ab27746eSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 794ab27746eSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 795ab27746eSHong Zhang 796ab27746eSHong Zhang /* update -U(i,k): ba[ili] = uik */ 797ab27746eSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 798ab27746eSHong Zhang 799ab27746eSHong Zhang /* add multiple of row i to k-th row ... */ 800ab27746eSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 801ab27746eSHong Zhang if (jmin < jmax){ 802ab27746eSHong Zhang for (j=jmin; j<jmax; j++) { 803ab27746eSHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 804ab27746eSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 805ab27746eSHong Zhang u = ba + j*4; 806ab27746eSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 807ab27746eSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 808ab27746eSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 809ab27746eSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 81049b5e25fSSatish Balay } 811ab27746eSHong Zhang 812ab27746eSHong Zhang /* ... add i to row list for next nonzero entry */ 813ab27746eSHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 814ab27746eSHong Zhang j = bj[jmin]; 815ab27746eSHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 81649b5e25fSSatish Balay } 817ab27746eSHong Zhang i = nexti; 81849b5e25fSSatish Balay } 819ab27746eSHong Zhang 820ab27746eSHong Zhang /* save nonzero entries in k-th row of U ... */ 821ab27746eSHong Zhang 82249b5e25fSSatish Balay /* invert diagonal block */ 823ab27746eSHong Zhang diag = ba+k*4; 824ab27746eSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 825ab27746eSHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 826ab27746eSHong Zhang 827ab27746eSHong Zhang jmin = bi[k]; jmax = bi[k+1]; 828ab27746eSHong Zhang if (jmin < jmax) { 829ab27746eSHong Zhang for (j=jmin; j<jmax; j++){ 830ab27746eSHong Zhang vj = bj[j]; /* block col. index of U */ 831ab27746eSHong Zhang u = ba + j*4; 832ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 833ab27746eSHong Zhang for (k1=0; k1<4; k1++){ 834ab27746eSHong Zhang *u++ = *rtmp_ptr; 835ab27746eSHong Zhang *rtmp_ptr++ = 0.0; 836ab27746eSHong Zhang } 837ab27746eSHong Zhang } 838ab27746eSHong Zhang 839ab27746eSHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 840ab27746eSHong Zhang il[k] = jmin; 841ab27746eSHong Zhang i = bj[jmin]; 842ab27746eSHong Zhang jl[k] = jl[i]; jl[i] = k; 843ab27746eSHong Zhang } 84449b5e25fSSatish Balay } 84549b5e25fSSatish Balay 84649b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 847ab27746eSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 848ab27746eSHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 849ab27746eSHong Zhang 850ab27746eSHong Zhang C->factor = FACTOR_CHOLESKY; 85149b5e25fSSatish Balay C->assembled = PETSC_TRUE; 8525c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 853b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 85449b5e25fSSatish Balay PetscFunctionReturn(0); 85549b5e25fSSatish Balay } 85649b5e25fSSatish Balay 85749b5e25fSSatish Balay /* 8585c0bcdfcSHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 85991602c66SHong Zhang Version for blocks are 1 by 1. 86049b5e25fSSatish Balay */ 8614a2ae208SSatish Balay #undef __FUNCT__ 8624a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 8636f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 86449b5e25fSSatish Balay { 86549b5e25fSSatish Balay Mat C = *B; 86649b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 86749b5e25fSSatish Balay IS ip = b->row; 868351d0355SHong Zhang int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 869cb718733SHong Zhang int *ai,*aj,*r; 870ab27746eSHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 871066653e3SSatish Balay MatScalar *rtmp; 8722d007305SHong Zhang MatScalar *ba = b->a,*aa,ak; 87349b5e25fSSatish Balay MatScalar dk,uikdi; 87449b5e25fSSatish Balay 87549b5e25fSSatish Balay PetscFunctionBegin; 87649b5e25fSSatish Balay ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 877cb718733SHong Zhang if (!a->permute){ 8782d007305SHong Zhang ai = a->i; aj = a->j; aa = a->a; 8792d007305SHong Zhang } else { 8802d007305SHong Zhang ai = a->inew; aj = a->jnew; 88182502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 882cb718733SHong Zhang ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 88382502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 8842d007305SHong Zhang ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 8852d007305SHong Zhang 8862d007305SHong Zhang jmin = ai[0]; jmax = ai[mbs]; 8872d007305SHong Zhang for (j=jmin; j<jmax; j++){ 888cb718733SHong Zhang while (r[j] != j){ 8892d007305SHong Zhang k = r[j]; r[j] = r[k]; r[k] = k; 8902d007305SHong Zhang ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 8912d007305SHong Zhang } 8922d007305SHong Zhang } 893ac355199SBarry Smith ierr = PetscFree(r);CHKERRQ(ierr); 8942d007305SHong Zhang } 8952d007305SHong Zhang 89691602c66SHong Zhang /* initialization */ 89749b5e25fSSatish Balay /* il and jl record the first nonzero element in each row of the accessing 89849b5e25fSSatish Balay window U(0:k, k:mbs-1). 89949b5e25fSSatish Balay jl: list of rows to be added to uneliminated rows 90049b5e25fSSatish Balay i>= k: jl(i) is the first row to be added to row i 90149b5e25fSSatish Balay i< k: jl(i) is the row following row i in some list of rows 90249b5e25fSSatish Balay jl(i) = mbs indicates the end of a list 90349b5e25fSSatish Balay il(i): points to the first nonzero element in columns k,...,mbs-1 of 90449b5e25fSSatish Balay row i of U */ 90582502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 90682502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 90728de702eSHong Zhang jl = il + mbs; 90849b5e25fSSatish Balay for (i=0; i<mbs; i++) { 90949b5e25fSSatish Balay rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 91049b5e25fSSatish Balay } 91149b5e25fSSatish Balay 91291602c66SHong Zhang /* for each row k */ 91349b5e25fSSatish Balay for (k = 0; k<mbs; k++){ 91449b5e25fSSatish Balay 91591602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 91649b5e25fSSatish Balay jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 917057f5ba7SHong Zhang 91849b5e25fSSatish Balay for (j = jmin; j < jmax; j++){ 919351d0355SHong Zhang vj = rip[aj[j]]; 920ab27746eSHong Zhang rtmp[vj] = aa[j]; 92149b5e25fSSatish Balay } 92249b5e25fSSatish Balay 92391602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 92449b5e25fSSatish Balay dk = rtmp[k]; 92549b5e25fSSatish Balay i = jl[k]; /* first row to be added to k_th row */ 92649b5e25fSSatish Balay 927057f5ba7SHong Zhang while (i < k){ 92849b5e25fSSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 92949b5e25fSSatish Balay 93091602c66SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 93149b5e25fSSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 93249b5e25fSSatish Balay uikdi = - ba[ili]*ba[i]; 93349b5e25fSSatish Balay dk += uikdi*ba[ili]; 934658e7b3eSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 93549b5e25fSSatish Balay 93691602c66SHong Zhang /* add multiple of row i to k-th row ... */ 93749b5e25fSSatish Balay jmin = ili + 1; jmax = bi[i+1]; 93849b5e25fSSatish Balay if (jmin < jmax){ 93949b5e25fSSatish Balay for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 94091602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 94149b5e25fSSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 94249b5e25fSSatish Balay j = bj[jmin]; 94349b5e25fSSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 94449b5e25fSSatish Balay } 945ab27746eSHong Zhang i = nexti; 94649b5e25fSSatish Balay } 94749b5e25fSSatish Balay 94891602c66SHong Zhang /* check for zero pivot and save diagoanl element */ 94949b5e25fSSatish Balay if (dk == 0.0){ 95029bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 951671cb588SHong Zhang /* 9521223c01bSHong Zhang } else if (PetscRealPart(dk) < 0.0){ 9531223c01bSHong Zhang SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 954671cb588SHong Zhang */ 95549b5e25fSSatish Balay } 95649b5e25fSSatish Balay 95791602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 958f3dd7b2dSKris Buschelman ba[k] = 1.0/dk; 95949b5e25fSSatish Balay jmin = bi[k]; jmax = bi[k+1]; 96049b5e25fSSatish Balay if (jmin < jmax) { 96149b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 962cc0c071aSHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 96349b5e25fSSatish Balay } 96491602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 96549b5e25fSSatish Balay il[k] = jmin; 96649b5e25fSSatish Balay i = bj[jmin]; 96749b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 96849b5e25fSSatish Balay } 96949b5e25fSSatish Balay } 97049b5e25fSSatish Balay 97149b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 97249b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 973cb718733SHong Zhang if (a->permute){ 974cb718733SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 975cb718733SHong Zhang } 97649b5e25fSSatish Balay 97749b5e25fSSatish Balay ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 9788be8c0c7SHong Zhang C->factor = FACTOR_CHOLESKY; 97949b5e25fSSatish Balay C->assembled = PETSC_TRUE; 9805c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 981b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 98249b5e25fSSatish Balay PetscFunctionReturn(0); 98349b5e25fSSatish Balay } 98449b5e25fSSatish Balay 985671cb588SHong Zhang /* 986671cb588SHong Zhang Version for when blocks are 1 by 1 Using natural ordering 987671cb588SHong Zhang */ 9884a2ae208SSatish Balay #undef __FUNCT__ 9894a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 990671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 991671cb588SHong Zhang { 992671cb588SHong Zhang Mat C = *B; 993671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 994671cb588SHong Zhang int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 9951ad1de41SHong Zhang int *ai,*aj; 996671cb588SHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 9971ad1de41SHong Zhang MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 998671cb588SHong Zhang 999671cb588SHong Zhang PetscFunctionBegin; 1000671cb588SHong Zhang 1001671cb588SHong Zhang /* initialization */ 1002671cb588SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 1003671cb588SHong Zhang window U(0:k, k:mbs-1). 1004671cb588SHong Zhang jl: list of rows to be added to uneliminated rows 1005671cb588SHong Zhang i>= k: jl(i) is the first row to be added to row i 1006671cb588SHong Zhang i< k: jl(i) is the row following row i in some list of rows 1007671cb588SHong Zhang jl(i) = mbs indicates the end of a list 1008671cb588SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 1009671cb588SHong Zhang row i of U */ 101082502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 101182502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 101228de702eSHong Zhang jl = il + mbs; 1013671cb588SHong Zhang for (i=0; i<mbs; i++) { 1014671cb588SHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1015671cb588SHong Zhang } 1016671cb588SHong Zhang 1017671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 1018671cb588SHong Zhang 1019671cb588SHong Zhang /* for each row k */ 1020671cb588SHong Zhang for (k = 0; k<mbs; k++){ 1021671cb588SHong Zhang 1022671cb588SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 1023671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 1024057f5ba7SHong Zhang 1025671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 1026671cb588SHong Zhang vj = aj[j]; 1027671cb588SHong Zhang rtmp[vj] = aa[j]; 1028671cb588SHong Zhang } 1029671cb588SHong Zhang 1030671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1031671cb588SHong Zhang dk = rtmp[k]; 1032671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 1033671cb588SHong Zhang 1034057f5ba7SHong Zhang while (i < k){ 1035671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 1036671cb588SHong Zhang 1037671cb588SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 1038671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1039671cb588SHong Zhang uikdi = - ba[ili]*ba[i]; 1040671cb588SHong Zhang dk += uikdi*ba[ili]; 1041671cb588SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 1042671cb588SHong Zhang 1043671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 1044671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 1045671cb588SHong Zhang if (jmin < jmax){ 1046671cb588SHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1047671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 1048671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1049671cb588SHong Zhang j = bj[jmin]; 1050671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 1051671cb588SHong Zhang } 1052671cb588SHong Zhang i = nexti; 1053671cb588SHong Zhang } 1054671cb588SHong Zhang 1055671cb588SHong Zhang /* check for zero pivot and save diagoanl element */ 1056671cb588SHong Zhang if (dk == 0.0){ 1057671cb588SHong Zhang SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1058671cb588SHong Zhang /* 1059671cb588SHong Zhang } else if (PetscRealPart(dk) < 0){ 10601223c01bSHong Zhang SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1061671cb588SHong Zhang */ 1062671cb588SHong Zhang } 1063671cb588SHong Zhang 1064671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 1065671cb588SHong Zhang ba[k] = 1.0/dk; 1066671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 1067671cb588SHong Zhang if (jmin < jmax) { 1068671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 1069671cb588SHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1070671cb588SHong Zhang } 1071671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1072671cb588SHong Zhang il[k] = jmin; 1073671cb588SHong Zhang i = bj[jmin]; 1074671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 1075671cb588SHong Zhang } 1076671cb588SHong Zhang } 1077671cb588SHong Zhang 1078671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1079671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1080671cb588SHong Zhang 1081671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 1082671cb588SHong Zhang C->assembled = PETSC_TRUE; 1083671cb588SHong Zhang C->preallocated = PETSC_TRUE; 1084b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 1085671cb588SHong Zhang PetscFunctionReturn(0); 1086671cb588SHong Zhang } 1087671cb588SHong Zhang 10884a2ae208SSatish Balay #undef __FUNCT__ 10894a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1090b8b23757SHong Zhang int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 109149b5e25fSSatish Balay { 10924445ddedSHong Zhang int ierr; 109349b5e25fSSatish Balay Mat C; 109449b5e25fSSatish Balay 109549b5e25fSSatish Balay PetscFunctionBegin; 1096b8b23757SHong Zhang ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1097a4ada70bSHong Zhang ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 10984445ddedSHong Zhang ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 109949b5e25fSSatish Balay PetscFunctionReturn(0); 110049b5e25fSSatish Balay } 110149b5e25fSSatish Balay 110249b5e25fSSatish Balay 1103