173f4d377SMatthew Knepley /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 249b5e25fSSatish Balay 349b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 43a7fca6bSBarry Smith #include "src/mat/impls/sbaij/seq/sbaij.h" 549b5e25fSSatish Balay #include "src/vec/vecimpl.h" 649b5e25fSSatish Balay #include "src/inline/ilu.h" 749a6740bSHong Zhang #include "include/petscis.h" 849b5e25fSSatish Balay 9*5f9f512dSHong Zhang /* 10*5f9f512dSHong Zhang input: 11*5f9f512dSHong Zhang A -- original matrix in SEQSBAIJ format 12*5f9f512dSHong Zhang F -- symbolic or numeric factor of A 13*5f9f512dSHong Zhang output: 14*5f9f512dSHong Zhang F -- numeric factor of A if F is input as symbolic factor of A 15*5f9f512dSHong Zhang nneg, nzero, npos: inertia of A 16*5f9f512dSHong Zhang */ 17*5f9f512dSHong Zhang 18*5f9f512dSHong Zhang #undef __FUNCT__ 19*5f9f512dSHong Zhang #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 20*5f9f512dSHong Zhang int MatGetInertia_SeqSBAIJ(Mat A,Mat *F,int *nneig,int *nzero,int *npos) 21*5f9f512dSHong Zhang { 22*5f9f512dSHong Zhang PetscScalar *dd; 23*5f9f512dSHong Zhang Mat_SeqSBAIJ *fact_ptr; 24*5f9f512dSHong Zhang int ierr,m=A->m,i; 25*5f9f512dSHong Zhang 26*5f9f512dSHong Zhang PetscFunctionBegin; 27*5f9f512dSHong Zhang if (! (*F)->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for symfactor yet, use numfactor F"); 28*5f9f512dSHong Zhang 29*5f9f512dSHong Zhang fact_ptr = (Mat_SeqSBAIJ*)(*F)->data; 30*5f9f512dSHong Zhang dd = fact_ptr->a; 31*5f9f512dSHong Zhang *nneig = 0; 32*5f9f512dSHong Zhang for (i=0; i<m; i++){ 33*5f9f512dSHong Zhang if (dd[i] < 0.0) (*nneig)++; 34*5f9f512dSHong Zhang } 35*5f9f512dSHong Zhang 36*5f9f512dSHong Zhang PetscFunctionReturn(0); 37*5f9f512dSHong Zhang } 38*5f9f512dSHong Zhang 39*5f9f512dSHong Zhang /* Using Modified Sparse Row (MSR) storage. 40*5f9f512dSHong Zhang See page 85, "Iterative Methods ..." by Saad. */ 41*5f9f512dSHong Zhang /* 42*5f9f512dSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 43*5f9f512dSHong Zhang */ 4487fe1012SHong Zhang /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 454a2ae208SSatish Balay #undef __FUNCT__ 464a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 4777f73120SHong Zhang int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 4849b5e25fSSatish Balay { 4949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 50cb718733SHong Zhang int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 51066653e3SSatish Balay int *jutmp,bs = a->bs,bs2=a->bs2; 5287fe1012SHong Zhang int m,realloc = 0,prow; 53d60e5362SHong Zhang int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 54671cb588SHong Zhang PetscTruth perm_identity; 5549b5e25fSSatish Balay 5649b5e25fSSatish Balay PetscFunctionBegin; 572d007305SHong Zhang 58cb718733SHong Zhang /* check whether perm is the identity mapping */ 59671cb588SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 60671cb588SHong Zhang if (!perm_identity) a->permute = PETSC_TRUE; 612d007305SHong Zhang 62671cb588SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 63671cb588SHong Zhang 64671cb588SHong Zhang if (perm_identity){ /* without permutation */ 652d007305SHong Zhang ai = a->i; aj = a->j; 662d007305SHong Zhang } else { /* non-trivial permutation */ 67323b833fSBarry Smith ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 682d007305SHong Zhang ai = a->inew; aj = a->jnew; 692d007305SHong Zhang } 702d007305SHong Zhang 7149b5e25fSSatish Balay /* initialization */ 72b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 7349b5e25fSSatish Balay umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 7482502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 7549b5e25fSSatish Balay iu[0] = mbs+1; 7687fe1012SHong Zhang juidx = mbs + 1; /* index for ju */ 7787fe1012SHong Zhang ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 7887fe1012SHong Zhang q = jl + mbs; /* linked list for col index */ 7949b5e25fSSatish Balay for (i=0; i<mbs; i++){ 8087fe1012SHong Zhang jl[i] = mbs; 8187fe1012SHong Zhang q[i] = 0; 8249b5e25fSSatish Balay } 8349b5e25fSSatish Balay 8449b5e25fSSatish Balay /* for each row k */ 8549b5e25fSSatish Balay for (k=0; k<mbs; k++){ 8649b5e25fSSatish Balay nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 8749b5e25fSSatish Balay q[k] = mbs; 8849b5e25fSSatish Balay /* initialize nonzero structure of k-th row to row rip[k] of A */ 8949b5e25fSSatish Balay jmin = ai[rip[k]]; 9049b5e25fSSatish Balay jmax = ai[rip[k]+1]; 9149b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 92cb718733SHong Zhang vj = rip[aj[j]]; /* col. value */ 9349b5e25fSSatish Balay if(vj > k){ 9449b5e25fSSatish Balay qm = k; 9549b5e25fSSatish Balay do { 9649b5e25fSSatish Balay m = qm; qm = q[m]; 9749b5e25fSSatish Balay } while(qm < vj); 9849b5e25fSSatish Balay if (qm == vj) { 993078e140SBarry Smith SETERRQ(1," error: duplicate entry in A\n"); 10049b5e25fSSatish Balay } 10149b5e25fSSatish Balay nzk++; 10249b5e25fSSatish Balay q[m] = vj; 10349b5e25fSSatish Balay q[vj] = qm; 10449b5e25fSSatish Balay } /* if(vj > k) */ 10549b5e25fSSatish Balay } /* for (j=jmin; j<jmax; j++) */ 10649b5e25fSSatish Balay 10749b5e25fSSatish Balay /* modify nonzero structure of k-th row by computing fill-in 10849b5e25fSSatish Balay for each row i to be merged in */ 10987fe1012SHong Zhang prow = k; 11087fe1012SHong Zhang prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 11187fe1012SHong Zhang 11287fe1012SHong Zhang while (prow < k){ 11387fe1012SHong Zhang /* merge row prow into k-th row */ 11487fe1012SHong Zhang jmin = iu[prow] + 1; jmax = iu[prow+1]; 11549b5e25fSSatish Balay qm = k; 11687fe1012SHong Zhang for (j=jmin; j<jmax; j++){ 11749b5e25fSSatish Balay vj = ju[j]; 11849b5e25fSSatish Balay do { 11949b5e25fSSatish Balay m = qm; qm = q[m]; 12049b5e25fSSatish Balay } while (qm < vj); 12149b5e25fSSatish Balay if (qm != vj){ 12249b5e25fSSatish Balay nzk++; q[m] = vj; q[vj] = qm; qm = vj; 12349b5e25fSSatish Balay } 12449b5e25fSSatish Balay } 12587fe1012SHong Zhang prow = jl[prow]; /* next pivot row */ 12649b5e25fSSatish Balay } 12749b5e25fSSatish Balay 12849b5e25fSSatish Balay /* add k to row list for first nonzero element in k-th row */ 12949b5e25fSSatish Balay if (nzk > 0){ 13049b5e25fSSatish Balay i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 13149b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 13249b5e25fSSatish Balay } 13387fe1012SHong Zhang iu[k+1] = iu[k] + nzk; 13449b5e25fSSatish Balay 13549b5e25fSSatish Balay /* allocate more space to ju if needed */ 1363078e140SBarry Smith if (iu[k+1] > umax) { 13749b5e25fSSatish Balay /* estimate how much additional space we will need */ 13849b5e25fSSatish Balay /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 13949b5e25fSSatish Balay /* just double the memory each time */ 14049b5e25fSSatish Balay maxadd = umax; 14149b5e25fSSatish Balay if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 14249b5e25fSSatish Balay umax += maxadd; 14349b5e25fSSatish Balay 1449f9cb213SHong Zhang /* allocate a longer ju */ 14582502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 14649b5e25fSSatish Balay ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 14749b5e25fSSatish Balay ierr = PetscFree(ju);CHKERRQ(ierr); 1489f9cb213SHong Zhang ju = jutmp; 14949b5e25fSSatish Balay realloc++; /* count how many times we realloc */ 15049b5e25fSSatish Balay } 15149b5e25fSSatish Balay 15249b5e25fSSatish Balay /* save nonzero structure of k-th row in ju */ 15349b5e25fSSatish Balay i=k; 15487fe1012SHong Zhang while (nzk --) { 15549b5e25fSSatish Balay i = q[i]; 15687fe1012SHong Zhang ju[juidx++] = i; 15749b5e25fSSatish Balay } 15877f73120SHong Zhang } 15949b5e25fSSatish Balay 16049b5e25fSSatish Balay if (ai[mbs] != 0) { 16149b5e25fSSatish Balay PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 162b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1633078e140SBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1643078e140SBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 165b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 16649b5e25fSSatish Balay } else { 167b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 16849b5e25fSSatish Balay } 16949b5e25fSSatish Balay 17077f73120SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 17187fe1012SHong Zhang /* ierr = PetscFree(q);CHKERRQ(ierr); */ 17249b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 17349b5e25fSSatish Balay 17449b5e25fSSatish Balay /* put together the new matrix */ 17549b5e25fSSatish Balay ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 176b0a32e0cSBarry Smith /* PetscLogObjectParent(*B,iperm); */ 17749b5e25fSSatish Balay b = (Mat_SeqSBAIJ*)(*B)->data; 17849b5e25fSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 17949b5e25fSSatish Balay b->singlemalloc = PETSC_FALSE; 18049b5e25fSSatish Balay /* the next line frees the default space generated by the Create() */ 18149b5e25fSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 18249b5e25fSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 18382502324SSatish Balay ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 18449b5e25fSSatish Balay b->j = ju; 18549b5e25fSSatish Balay b->i = iu; 18649b5e25fSSatish Balay b->diag = 0; 18749b5e25fSSatish Balay b->ilen = 0; 18849b5e25fSSatish Balay b->imax = 0; 18977f73120SHong Zhang b->row = perm; 190bcd9e38bSBarry Smith b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 19177f73120SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 192cb718733SHong Zhang b->icol = perm; 193cb718733SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 19487828ca2SBarry Smith ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 19549b5e25fSSatish Balay /* In b structure: Free imax, ilen, old a, old j. 19649b5e25fSSatish Balay Allocate idnew, solve_work, new a, new j */ 197b0a32e0cSBarry Smith PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 19849b5e25fSSatish Balay b->s_maxnz = b->s_nz = iu[mbs]; 19949b5e25fSSatish Balay 2005c0bcdfcSHong Zhang (*B)->factor = FACTOR_CHOLESKY; 20149b5e25fSSatish Balay (*B)->info.factor_mallocs = realloc; 20249b5e25fSSatish Balay (*B)->info.fill_ratio_given = f; 20349b5e25fSSatish Balay if (ai[mbs] != 0) { 20449b5e25fSSatish Balay (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 20549b5e25fSSatish Balay } else { 20649b5e25fSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 20749b5e25fSSatish Balay } 2080aa0ce06SHong Zhang 209671cb588SHong Zhang if (perm_identity){ 210671cb588SHong Zhang switch (bs) { 211671cb588SHong Zhang case 1: 212671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 213671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2144d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 215671cb588SHong Zhang break; 216671cb588SHong Zhang case 2: 217671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 218671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2194d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 220671cb588SHong Zhang break; 221671cb588SHong Zhang case 3: 222671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 223671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2244d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 225671cb588SHong Zhang break; 226671cb588SHong Zhang case 4: 227671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 228671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2294d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 230671cb588SHong Zhang break; 231671cb588SHong Zhang case 5: 232671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 233671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2344d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 235671cb588SHong Zhang break; 236671cb588SHong Zhang case 6: 237671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 238671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2394d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 240671cb588SHong Zhang break; 241671cb588SHong Zhang case 7: 242671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 243671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2444d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 245671cb588SHong Zhang break; 246671cb588SHong Zhang default: 247671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 248671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2494d101231SSatish Balay PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 250671cb588SHong Zhang break; 251671cb588SHong Zhang } 252671cb588SHong Zhang } 253671cb588SHong Zhang 25449b5e25fSSatish Balay PetscFunctionReturn(0); 25549b5e25fSSatish Balay } 25649b5e25fSSatish Balay 25735d8aa7fSBarry Smith 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 2606f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 26149b5e25fSSatish Balay { 26249b5e25fSSatish Balay Mat C = *B; 2634c16a6a6SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 2644c16a6a6SHong Zhang IS perm = b->row; 2654c16a6a6SHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 2664c16a6a6SHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 2674c16a6a6SHong Zhang int bs=a->bs,bs2 = a->bs2; 2684c16a6a6SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 2694c16a6a6SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 27028de702eSHong Zhang MatScalar *work; 2714c16a6a6SHong Zhang int *pivots; 2724c16a6a6SHong Zhang 2734c16a6a6SHong Zhang PetscFunctionBegin; 2749706f043SHong Zhang 2754c16a6a6SHong Zhang /* initialization */ 27682502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2774c16a6a6SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 27882502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 27928de702eSHong Zhang jl = il + mbs; 2804c16a6a6SHong Zhang for (i=0; i<mbs; i++) { 2814c16a6a6SHong Zhang jl[i] = mbs; il[0] = 0; 2824c16a6a6SHong Zhang } 283b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 28428de702eSHong Zhang uik = dk + bs2; 28528de702eSHong Zhang work = uik + bs2; 28682502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 2874c16a6a6SHong Zhang 2884c16a6a6SHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 2894c16a6a6SHong Zhang 2904c16a6a6SHong Zhang /* check permutation */ 2914c16a6a6SHong Zhang if (!a->permute){ 2924c16a6a6SHong Zhang ai = a->i; aj = a->j; aa = a->a; 2934c16a6a6SHong Zhang } else { 2944c16a6a6SHong Zhang ai = a->inew; aj = a->jnew; 29582502324SSatish Balay ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 2964c16a6a6SHong Zhang ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 29782502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 2984c16a6a6SHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 2994c16a6a6SHong Zhang 3004c16a6a6SHong Zhang for (i=0; i<mbs; i++){ 3014c16a6a6SHong Zhang jmin = ai[i]; jmax = ai[i+1]; 3024c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 3034c16a6a6SHong Zhang while (a2anew[j] != j){ 3044c16a6a6SHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 3054c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 3064c16a6a6SHong Zhang dk[k1] = aa[k*bs2+k1]; 3074c16a6a6SHong Zhang aa[k*bs2+k1] = aa[j*bs2+k1]; 3084c16a6a6SHong Zhang aa[j*bs2+k1] = dk[k1]; 3094c16a6a6SHong Zhang } 3104c16a6a6SHong Zhang } 3114c16a6a6SHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 3124c16a6a6SHong Zhang if (i > aj[j]){ 3134c16a6a6SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 3144c16a6a6SHong Zhang ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 3154c16a6a6SHong Zhang for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 3164c16a6a6SHong Zhang for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 3174c16a6a6SHong Zhang for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 3184c16a6a6SHong Zhang } 3194c16a6a6SHong Zhang } 3204c16a6a6SHong Zhang } 3214c16a6a6SHong Zhang } 322323b833fSBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 3234c16a6a6SHong Zhang } 3244c16a6a6SHong Zhang 3254c16a6a6SHong Zhang /* for each row k */ 3264c16a6a6SHong Zhang for (k = 0; k<mbs; k++){ 3274c16a6a6SHong Zhang 3284c16a6a6SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 3294c16a6a6SHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 330057f5ba7SHong Zhang 3314c16a6a6SHong Zhang ap = aa + jmin*bs2; 3324c16a6a6SHong Zhang for (j = jmin; j < jmax; j++){ 3334c16a6a6SHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 3344c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 3354c16a6a6SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 3364c16a6a6SHong Zhang } 3374c16a6a6SHong Zhang 3384c16a6a6SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 3394c16a6a6SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3404c16a6a6SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 3414c16a6a6SHong Zhang 342057f5ba7SHong Zhang while (i < k){ 3434c16a6a6SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 3444c16a6a6SHong Zhang 3454c16a6a6SHong Zhang /* compute multiplier */ 3464c16a6a6SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 3474c16a6a6SHong Zhang 3484c16a6a6SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 3494c16a6a6SHong Zhang diag = ba + i*bs2; 3504c16a6a6SHong Zhang u = ba + ili*bs2; 3514c16a6a6SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3524c16a6a6SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 3534c16a6a6SHong Zhang 3544c16a6a6SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 3554c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 3564c16a6a6SHong Zhang 3574c16a6a6SHong Zhang /* update -U(i,k) */ 3584c16a6a6SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3594c16a6a6SHong Zhang 3604c16a6a6SHong Zhang /* add multiple of row i to k-th row ... */ 3614c16a6a6SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 3624c16a6a6SHong Zhang if (jmin < jmax){ 3634c16a6a6SHong Zhang for (j=jmin; j<jmax; j++) { 3644c16a6a6SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 3654c16a6a6SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 3664c16a6a6SHong Zhang u = ba + j*bs2; 3674c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 3684c16a6a6SHong Zhang } 3694c16a6a6SHong Zhang 3704c16a6a6SHong Zhang /* ... add i to row list for next nonzero entry */ 3714c16a6a6SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 3724c16a6a6SHong Zhang j = bj[jmin]; 3734c16a6a6SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 3744c16a6a6SHong Zhang } 3754c16a6a6SHong Zhang i = nexti; 3764c16a6a6SHong Zhang } 3774c16a6a6SHong Zhang 3784c16a6a6SHong Zhang /* save nonzero entries in k-th row of U ... */ 3794c16a6a6SHong Zhang 3804c16a6a6SHong Zhang /* invert diagonal block */ 3814c16a6a6SHong Zhang diag = ba+k*bs2; 3824c16a6a6SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3834c16a6a6SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 3844c16a6a6SHong Zhang 3854c16a6a6SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 3864c16a6a6SHong Zhang if (jmin < jmax) { 3874c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 3884c16a6a6SHong Zhang vj = bj[j]; /* block col. index of U */ 3894c16a6a6SHong Zhang u = ba + j*bs2; 3904c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 3914c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 3924c16a6a6SHong Zhang *u++ = *rtmp_ptr; 3934c16a6a6SHong Zhang *rtmp_ptr++ = 0.0; 3944c16a6a6SHong Zhang } 3954c16a6a6SHong Zhang } 3964c16a6a6SHong Zhang 3974c16a6a6SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 3984c16a6a6SHong Zhang il[k] = jmin; 3994c16a6a6SHong Zhang i = bj[jmin]; 4004c16a6a6SHong Zhang jl[k] = jl[i]; jl[i] = k; 4014c16a6a6SHong Zhang } 4024c16a6a6SHong Zhang } 4034c16a6a6SHong Zhang 4044c16a6a6SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 4054c16a6a6SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 4064c16a6a6SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 4074c16a6a6SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 4084c16a6a6SHong Zhang if (a->permute){ 4094c16a6a6SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 4104c16a6a6SHong Zhang } 4114c16a6a6SHong Zhang 4124c16a6a6SHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 4134c16a6a6SHong Zhang C->factor = FACTOR_CHOLESKY; 4144c16a6a6SHong Zhang C->assembled = PETSC_TRUE; 4154c16a6a6SHong Zhang C->preallocated = PETSC_TRUE; 416b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 4174c16a6a6SHong Zhang PetscFunctionReturn(0); 4184c16a6a6SHong Zhang } 419d4edadd4SHong Zhang 4204a2ae208SSatish Balay #undef __FUNCT__ 4214a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 422671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 423671cb588SHong Zhang { 424671cb588SHong Zhang Mat C = *B; 425671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 426671cb588SHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 4271ad1de41SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 428671cb588SHong Zhang int bs=a->bs,bs2 = a->bs2; 429671cb588SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 430671cb588SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 43128de702eSHong Zhang MatScalar *work; 432671cb588SHong Zhang int *pivots; 433671cb588SHong Zhang 434671cb588SHong Zhang PetscFunctionBegin; 435671cb588SHong Zhang 436671cb588SHong Zhang /* initialization */ 437671cb588SHong Zhang 43882502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 439671cb588SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 44082502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 44128de702eSHong Zhang jl = il + mbs; 442671cb588SHong Zhang for (i=0; i<mbs; i++) { 443671cb588SHong Zhang jl[i] = mbs; il[0] = 0; 444671cb588SHong Zhang } 445b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 44628de702eSHong Zhang uik = dk + bs2; 44728de702eSHong Zhang work = uik + bs2; 44882502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 449671cb588SHong Zhang 450671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 451671cb588SHong Zhang 452671cb588SHong Zhang /* for each row k */ 453671cb588SHong Zhang for (k = 0; k<mbs; k++){ 454671cb588SHong Zhang 455671cb588SHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 456671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 457671cb588SHong Zhang ap = aa + jmin*bs2; 458671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 459671cb588SHong Zhang vj = aj[j]; /* block col. index */ 460671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 461671cb588SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 462671cb588SHong Zhang } 463671cb588SHong Zhang 464671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 465671cb588SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 466671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 467671cb588SHong Zhang 468057f5ba7SHong Zhang while (i < k){ 469671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 470671cb588SHong Zhang 471671cb588SHong Zhang /* compute multiplier */ 472671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 473671cb588SHong Zhang 474671cb588SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 475671cb588SHong Zhang diag = ba + i*bs2; 476671cb588SHong Zhang u = ba + ili*bs2; 477671cb588SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 478671cb588SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 479671cb588SHong Zhang 480671cb588SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 481671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 482671cb588SHong Zhang 483671cb588SHong Zhang /* update -U(i,k) */ 484671cb588SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 485671cb588SHong Zhang 486671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 487671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 488671cb588SHong Zhang if (jmin < jmax){ 489671cb588SHong Zhang for (j=jmin; j<jmax; j++) { 490671cb588SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 491671cb588SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 492671cb588SHong Zhang u = ba + j*bs2; 493671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 494671cb588SHong Zhang } 495671cb588SHong Zhang 496671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 497671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 498671cb588SHong Zhang j = bj[jmin]; 499671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 500671cb588SHong Zhang } 501671cb588SHong Zhang i = nexti; 502671cb588SHong Zhang } 503671cb588SHong Zhang 504671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 505671cb588SHong Zhang 506671cb588SHong Zhang /* invert diagonal block */ 507671cb588SHong Zhang diag = ba+k*bs2; 508671cb588SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 509671cb588SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 510671cb588SHong Zhang 511671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 512671cb588SHong Zhang if (jmin < jmax) { 513671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 514671cb588SHong Zhang vj = bj[j]; /* block col. index of U */ 515671cb588SHong Zhang u = ba + j*bs2; 516671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 517671cb588SHong Zhang for (k1=0; k1<bs2; k1++){ 518671cb588SHong Zhang *u++ = *rtmp_ptr; 519671cb588SHong Zhang *rtmp_ptr++ = 0.0; 520671cb588SHong Zhang } 521671cb588SHong Zhang } 522671cb588SHong Zhang 523671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 524671cb588SHong Zhang il[k] = jmin; 525671cb588SHong Zhang i = bj[jmin]; 526671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 527671cb588SHong Zhang } 528671cb588SHong Zhang } 529671cb588SHong Zhang 530671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 531671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 532671cb588SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 533671cb588SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 534671cb588SHong Zhang 535671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 536671cb588SHong Zhang C->assembled = PETSC_TRUE; 537671cb588SHong Zhang C->preallocated = PETSC_TRUE; 538b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 539671cb588SHong Zhang PetscFunctionReturn(0); 540671cb588SHong Zhang } 541671cb588SHong Zhang 54249b5e25fSSatish Balay /* 543fcf159c0SHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 544cc0c071aSHong Zhang Version for blocks 2 by 2. 54549b5e25fSSatish Balay */ 5464a2ae208SSatish Balay #undef __FUNCT__ 5474a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 5486f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 54949b5e25fSSatish Balay { 55049b5e25fSSatish Balay Mat C = *B; 55191602c66SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 552cc0c071aSHong Zhang IS perm = b->row; 553cc0c071aSHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 554cc0c071aSHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 555a1723e09SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 556cc0c071aSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 55749b5e25fSSatish Balay 55849b5e25fSSatish Balay PetscFunctionBegin; 559671cb588SHong Zhang 56091602c66SHong Zhang /* initialization */ 56191602c66SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 56291602c66SHong Zhang window U(0:k, k:mbs-1). 56391602c66SHong Zhang jl: list of rows to be added to uneliminated rows 56491602c66SHong Zhang i>= k: jl(i) is the first row to be added to row i 56591602c66SHong Zhang i< k: jl(i) is the row following row i in some list of rows 56691602c66SHong Zhang jl(i) = mbs indicates the end of a list 56791602c66SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 56891602c66SHong Zhang row i of U */ 56982502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 570cc0c071aSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 57182502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 57228de702eSHong Zhang jl = il + mbs; 57391602c66SHong Zhang for (i=0; i<mbs; i++) { 5743845f261SHong Zhang jl[i] = mbs; il[0] = 0; 57591602c66SHong Zhang } 57682502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 57728de702eSHong Zhang uik = dk + 4; 578cc0c071aSHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 579cc0c071aSHong Zhang 580cc0c071aSHong Zhang /* check permutation */ 581cc0c071aSHong Zhang if (!a->permute){ 582cc0c071aSHong Zhang ai = a->i; aj = a->j; aa = a->a; 583cc0c071aSHong Zhang } else { 584cc0c071aSHong Zhang ai = a->inew; aj = a->jnew; 58582502324SSatish Balay ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 586cc0c071aSHong Zhang ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 58782502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 588cc0c071aSHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 589cc0c071aSHong Zhang 590cc0c071aSHong Zhang for (i=0; i<mbs; i++){ 591cc0c071aSHong Zhang jmin = ai[i]; jmax = ai[i+1]; 592cc0c071aSHong Zhang for (j=jmin; j<jmax; j++){ 593cc0c071aSHong Zhang while (a2anew[j] != j){ 594cc0c071aSHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 595cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 596cc0c071aSHong Zhang dk[k1] = aa[k*4+k1]; 597cc0c071aSHong Zhang aa[k*4+k1] = aa[j*4+k1]; 598cc0c071aSHong Zhang aa[j*4+k1] = dk[k1]; 599cc0c071aSHong Zhang } 600cc0c071aSHong Zhang } 601cc0c071aSHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 602cc0c071aSHong Zhang if (i > aj[j]){ 603a1723e09SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 604cc0c071aSHong Zhang ap = aa + j*4; /* ptr to the beginning of the block */ 605cc0c071aSHong Zhang dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 606cc0c071aSHong Zhang ap[1] = ap[2]; 607cc0c071aSHong Zhang ap[2] = dk[1]; 608cc0c071aSHong Zhang } 609cc0c071aSHong Zhang } 610cc0c071aSHong Zhang } 611ac355199SBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 612cc0c071aSHong Zhang } 6133845f261SHong Zhang 61491602c66SHong Zhang /* for each row k */ 61591602c66SHong Zhang for (k = 0; k<mbs; k++){ 61691602c66SHong Zhang 61791602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 618cc0c071aSHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 619cc0c071aSHong Zhang ap = aa + jmin*4; 62091602c66SHong Zhang for (j = jmin; j < jmax; j++){ 621cc0c071aSHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 622cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 623cc0c071aSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 62491602c66SHong Zhang } 62591602c66SHong Zhang 62691602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 627cc0c071aSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 62891602c66SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 62991602c66SHong Zhang 630057f5ba7SHong Zhang while (i < k){ 63191602c66SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 63291602c66SHong Zhang 6333845f261SHong Zhang /* compute multiplier */ 63491602c66SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 6353845f261SHong Zhang 6363845f261SHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 637cc0c071aSHong Zhang diag = ba + i*4; 638cc0c071aSHong Zhang u = ba + ili*4; 639cc0c071aSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 640cc0c071aSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 641cc0c071aSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 642cc0c071aSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 6433845f261SHong Zhang 6443845f261SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 645cc0c071aSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 646cc0c071aSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 647cc0c071aSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 648cc0c071aSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 6493845f261SHong Zhang 6503845f261SHong Zhang /* update -U(i,k): ba[ili] = uik */ 651cc0c071aSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 65291602c66SHong Zhang 65391602c66SHong Zhang /* add multiple of row i to k-th row ... */ 65491602c66SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 65591602c66SHong Zhang if (jmin < jmax){ 6563845f261SHong Zhang for (j=jmin; j<jmax; j++) { 6573845f261SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 658cc0c071aSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 659cc0c071aSHong Zhang u = ba + j*4; 660cc0c071aSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 661cc0c071aSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 662cc0c071aSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 663cc0c071aSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 6643845f261SHong Zhang } 6653845f261SHong Zhang 66691602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 66791602c66SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 66891602c66SHong Zhang j = bj[jmin]; 66991602c66SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 67091602c66SHong Zhang } 671a1723e09SHong Zhang i = nexti; 67291602c66SHong Zhang } 673cc0c071aSHong Zhang 67491602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 6753845f261SHong Zhang 676cc0c071aSHong Zhang /* invert diagonal block */ 677cc0c071aSHong Zhang diag = ba+k*4; 678cc0c071aSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 6793845f261SHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 6803845f261SHong Zhang 68191602c66SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 68291602c66SHong Zhang if (jmin < jmax) { 68391602c66SHong Zhang for (j=jmin; j<jmax; j++){ 684cc0c071aSHong Zhang vj = bj[j]; /* block col. index of U */ 685cc0c071aSHong Zhang u = ba + j*4; 686cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 687cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 688cc0c071aSHong Zhang *u++ = *rtmp_ptr; 689cc0c071aSHong Zhang *rtmp_ptr++ = 0.0; 690cc0c071aSHong Zhang } 691cc0c071aSHong Zhang } 6923845f261SHong Zhang 69391602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 69491602c66SHong Zhang il[k] = jmin; 69591602c66SHong Zhang i = bj[jmin]; 69691602c66SHong Zhang jl[k] = jl[i]; jl[i] = k; 69791602c66SHong Zhang } 69891602c66SHong Zhang } 6993845f261SHong Zhang 70049b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 70191602c66SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 7023845f261SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 70391602c66SHong Zhang if (a->permute) { 70491602c66SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 70591602c66SHong Zhang } 706cc0c071aSHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 70791602c66SHong Zhang C->factor = FACTOR_CHOLESKY; 70849b5e25fSSatish Balay C->assembled = PETSC_TRUE; 7095c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 710b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 71149b5e25fSSatish Balay PetscFunctionReturn(0); 71249b5e25fSSatish Balay } 71391602c66SHong Zhang 71449b5e25fSSatish Balay /* 71549b5e25fSSatish Balay Version for when blocks are 2 by 2 Using natural ordering 71649b5e25fSSatish Balay */ 7174a2ae208SSatish Balay #undef __FUNCT__ 7184a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 7196f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 72049b5e25fSSatish Balay { 72149b5e25fSSatish Balay Mat C = *B; 722ab27746eSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 723ab27746eSHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 72471149678SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 725ab27746eSHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 726ab27746eSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 72749b5e25fSSatish Balay 72849b5e25fSSatish Balay PetscFunctionBegin; 729671cb588SHong Zhang 730ab27746eSHong Zhang /* initialization */ 731ab27746eSHong Zhang /* il and jl record the first nonzero element in each row of the accessing 732ab27746eSHong Zhang window U(0:k, k:mbs-1). 733ab27746eSHong Zhang jl: list of rows to be added to uneliminated rows 734ab27746eSHong Zhang i>= k: jl(i) is the first row to be added to row i 735ab27746eSHong Zhang i< k: jl(i) is the row following row i in some list of rows 736ab27746eSHong Zhang jl(i) = mbs indicates the end of a list 737ab27746eSHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 738ab27746eSHong Zhang row i of U */ 73982502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 740ab27746eSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 74182502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 74228de702eSHong Zhang jl = il + mbs; 743ab27746eSHong Zhang for (i=0; i<mbs; i++) { 744ab27746eSHong Zhang jl[i] = mbs; il[0] = 0; 74549b5e25fSSatish Balay } 74682502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 74728de702eSHong Zhang uik = dk + 4; 748ab27746eSHong Zhang 749ab27746eSHong Zhang ai = a->i; aj = a->j; aa = a->a; 750ab27746eSHong Zhang 751ab27746eSHong Zhang /* for each row k */ 752ab27746eSHong Zhang for (k = 0; k<mbs; k++){ 753ab27746eSHong Zhang 754ab27746eSHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 755ab27746eSHong Zhang jmin = ai[k]; jmax = ai[k+1]; 756ab27746eSHong Zhang ap = aa + jmin*4; 757ab27746eSHong Zhang for (j = jmin; j < jmax; j++){ 758ab27746eSHong Zhang vj = aj[j]; /* block col. index */ 759ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 760ab27746eSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 76149b5e25fSSatish Balay } 762ab27746eSHong Zhang 763ab27746eSHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 764ab27746eSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 765ab27746eSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 766ab27746eSHong Zhang 767057f5ba7SHong Zhang while (i < k){ 768ab27746eSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 769ab27746eSHong Zhang 770ab27746eSHong Zhang /* compute multiplier */ 771ab27746eSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 772ab27746eSHong Zhang 773ab27746eSHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 774ab27746eSHong Zhang diag = ba + i*4; 775ab27746eSHong Zhang u = ba + ili*4; 776ab27746eSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 777ab27746eSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 778ab27746eSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 779ab27746eSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 780ab27746eSHong Zhang 781ab27746eSHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 782ab27746eSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 783ab27746eSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 784ab27746eSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 785ab27746eSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 786ab27746eSHong Zhang 787ab27746eSHong Zhang /* update -U(i,k): ba[ili] = uik */ 788ab27746eSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 789ab27746eSHong Zhang 790ab27746eSHong Zhang /* add multiple of row i to k-th row ... */ 791ab27746eSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 792ab27746eSHong Zhang if (jmin < jmax){ 793ab27746eSHong Zhang for (j=jmin; j<jmax; j++) { 794ab27746eSHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 795ab27746eSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 796ab27746eSHong Zhang u = ba + j*4; 797ab27746eSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 798ab27746eSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 799ab27746eSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 800ab27746eSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 80149b5e25fSSatish Balay } 802ab27746eSHong Zhang 803ab27746eSHong Zhang /* ... add i to row list for next nonzero entry */ 804ab27746eSHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 805ab27746eSHong Zhang j = bj[jmin]; 806ab27746eSHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 80749b5e25fSSatish Balay } 808ab27746eSHong Zhang i = nexti; 80949b5e25fSSatish Balay } 810ab27746eSHong Zhang 811ab27746eSHong Zhang /* save nonzero entries in k-th row of U ... */ 812ab27746eSHong Zhang 81349b5e25fSSatish Balay /* invert diagonal block */ 814ab27746eSHong Zhang diag = ba+k*4; 815ab27746eSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 816ab27746eSHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 817ab27746eSHong Zhang 818ab27746eSHong Zhang jmin = bi[k]; jmax = bi[k+1]; 819ab27746eSHong Zhang if (jmin < jmax) { 820ab27746eSHong Zhang for (j=jmin; j<jmax; j++){ 821ab27746eSHong Zhang vj = bj[j]; /* block col. index of U */ 822ab27746eSHong Zhang u = ba + j*4; 823ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 824ab27746eSHong Zhang for (k1=0; k1<4; k1++){ 825ab27746eSHong Zhang *u++ = *rtmp_ptr; 826ab27746eSHong Zhang *rtmp_ptr++ = 0.0; 827ab27746eSHong Zhang } 828ab27746eSHong Zhang } 829ab27746eSHong Zhang 830ab27746eSHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 831ab27746eSHong Zhang il[k] = jmin; 832ab27746eSHong Zhang i = bj[jmin]; 833ab27746eSHong Zhang jl[k] = jl[i]; jl[i] = k; 834ab27746eSHong Zhang } 83549b5e25fSSatish Balay } 83649b5e25fSSatish Balay 83749b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 838ab27746eSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 839ab27746eSHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 840ab27746eSHong Zhang 841ab27746eSHong Zhang C->factor = FACTOR_CHOLESKY; 84249b5e25fSSatish Balay C->assembled = PETSC_TRUE; 8435c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 844b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 84549b5e25fSSatish Balay PetscFunctionReturn(0); 84649b5e25fSSatish Balay } 84749b5e25fSSatish Balay 84849b5e25fSSatish Balay /* 8495c0bcdfcSHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 85091602c66SHong Zhang Version for blocks are 1 by 1. 85149b5e25fSSatish Balay */ 8524a2ae208SSatish Balay #undef __FUNCT__ 8534a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 8546f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 85549b5e25fSSatish Balay { 85649b5e25fSSatish Balay Mat C = *B; 85749b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 85849b5e25fSSatish Balay IS ip = b->row; 859351d0355SHong Zhang int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 860cb718733SHong Zhang int *ai,*aj,*r; 861ab27746eSHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 862066653e3SSatish Balay MatScalar *rtmp; 8632d007305SHong Zhang MatScalar *ba = b->a,*aa,ak; 86449b5e25fSSatish Balay MatScalar dk,uikdi; 86549b5e25fSSatish Balay 86649b5e25fSSatish Balay PetscFunctionBegin; 86749b5e25fSSatish Balay ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 868cb718733SHong Zhang if (!a->permute){ 8692d007305SHong Zhang ai = a->i; aj = a->j; aa = a->a; 8702d007305SHong Zhang } else { 8712d007305SHong Zhang ai = a->inew; aj = a->jnew; 87282502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 873cb718733SHong Zhang ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 87482502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 8752d007305SHong Zhang ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 8762d007305SHong Zhang 8772d007305SHong Zhang jmin = ai[0]; jmax = ai[mbs]; 8782d007305SHong Zhang for (j=jmin; j<jmax; j++){ 879cb718733SHong Zhang while (r[j] != j){ 8802d007305SHong Zhang k = r[j]; r[j] = r[k]; r[k] = k; 8812d007305SHong Zhang ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 8822d007305SHong Zhang } 8832d007305SHong Zhang } 884ac355199SBarry Smith ierr = PetscFree(r);CHKERRQ(ierr); 8852d007305SHong Zhang } 8862d007305SHong Zhang 88791602c66SHong Zhang /* initialization */ 88849b5e25fSSatish Balay /* il and jl record the first nonzero element in each row of the accessing 88949b5e25fSSatish Balay window U(0:k, k:mbs-1). 89049b5e25fSSatish Balay jl: list of rows to be added to uneliminated rows 89149b5e25fSSatish Balay i>= k: jl(i) is the first row to be added to row i 89249b5e25fSSatish Balay i< k: jl(i) is the row following row i in some list of rows 89349b5e25fSSatish Balay jl(i) = mbs indicates the end of a list 89449b5e25fSSatish Balay il(i): points to the first nonzero element in columns k,...,mbs-1 of 89549b5e25fSSatish Balay row i of U */ 89682502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 89782502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 89828de702eSHong Zhang jl = il + mbs; 89949b5e25fSSatish Balay for (i=0; i<mbs; i++) { 90049b5e25fSSatish Balay rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 90149b5e25fSSatish Balay } 90249b5e25fSSatish Balay 90391602c66SHong Zhang /* for each row k */ 90449b5e25fSSatish Balay for (k = 0; k<mbs; k++){ 90549b5e25fSSatish Balay 90691602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 90749b5e25fSSatish Balay jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 908057f5ba7SHong Zhang 90949b5e25fSSatish Balay for (j = jmin; j < jmax; j++){ 910351d0355SHong Zhang vj = rip[aj[j]]; 911ab27746eSHong Zhang rtmp[vj] = aa[j]; 91249b5e25fSSatish Balay } 91349b5e25fSSatish Balay 91491602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 91549b5e25fSSatish Balay dk = rtmp[k]; 91649b5e25fSSatish Balay i = jl[k]; /* first row to be added to k_th row */ 91749b5e25fSSatish Balay 918057f5ba7SHong Zhang while (i < k){ 91949b5e25fSSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 92049b5e25fSSatish Balay 92191602c66SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 92249b5e25fSSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 92349b5e25fSSatish Balay uikdi = - ba[ili]*ba[i]; 92449b5e25fSSatish Balay dk += uikdi*ba[ili]; 925658e7b3eSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 92649b5e25fSSatish Balay 92791602c66SHong Zhang /* add multiple of row i to k-th row ... */ 92849b5e25fSSatish Balay jmin = ili + 1; jmax = bi[i+1]; 92949b5e25fSSatish Balay if (jmin < jmax){ 93049b5e25fSSatish Balay for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 93191602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 93249b5e25fSSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 93349b5e25fSSatish Balay j = bj[jmin]; 93449b5e25fSSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 93549b5e25fSSatish Balay } 936ab27746eSHong Zhang i = nexti; 93749b5e25fSSatish Balay } 93849b5e25fSSatish Balay 93991602c66SHong Zhang /* check for zero pivot and save diagoanl element */ 94049b5e25fSSatish Balay if (dk == 0.0){ 94129bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 942671cb588SHong Zhang /* 9431223c01bSHong Zhang } else if (PetscRealPart(dk) < 0.0){ 9441223c01bSHong Zhang SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 945671cb588SHong Zhang */ 94649b5e25fSSatish Balay } 94749b5e25fSSatish Balay 94891602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 949f3dd7b2dSKris Buschelman ba[k] = 1.0/dk; 95049b5e25fSSatish Balay jmin = bi[k]; jmax = bi[k+1]; 95149b5e25fSSatish Balay if (jmin < jmax) { 95249b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 953cc0c071aSHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 95449b5e25fSSatish Balay } 95591602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 95649b5e25fSSatish Balay il[k] = jmin; 95749b5e25fSSatish Balay i = bj[jmin]; 95849b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 95949b5e25fSSatish Balay } 96049b5e25fSSatish Balay } 96149b5e25fSSatish Balay 96249b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 96349b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 964cb718733SHong Zhang if (a->permute){ 965cb718733SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 966cb718733SHong Zhang } 96749b5e25fSSatish Balay 96849b5e25fSSatish Balay ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 9698be8c0c7SHong Zhang C->factor = FACTOR_CHOLESKY; 97049b5e25fSSatish Balay C->assembled = PETSC_TRUE; 9715c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 972b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 97349b5e25fSSatish Balay PetscFunctionReturn(0); 97449b5e25fSSatish Balay } 97549b5e25fSSatish Balay 976671cb588SHong Zhang /* 977671cb588SHong Zhang Version for when blocks are 1 by 1 Using natural ordering 978671cb588SHong Zhang */ 9794a2ae208SSatish Balay #undef __FUNCT__ 9804a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 981671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 982671cb588SHong Zhang { 983671cb588SHong Zhang Mat C = *B; 984671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 985671cb588SHong Zhang int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 9861ad1de41SHong Zhang int *ai,*aj; 987671cb588SHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 9881ad1de41SHong Zhang MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 989671cb588SHong Zhang 990671cb588SHong Zhang PetscFunctionBegin; 991671cb588SHong Zhang 992671cb588SHong Zhang /* initialization */ 993671cb588SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 994671cb588SHong Zhang window U(0:k, k:mbs-1). 995671cb588SHong Zhang jl: list of rows to be added to uneliminated rows 996671cb588SHong Zhang i>= k: jl(i) is the first row to be added to row i 997671cb588SHong Zhang i< k: jl(i) is the row following row i in some list of rows 998671cb588SHong Zhang jl(i) = mbs indicates the end of a list 999671cb588SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 1000671cb588SHong Zhang row i of U */ 100182502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 100282502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 100328de702eSHong Zhang jl = il + mbs; 1004671cb588SHong Zhang for (i=0; i<mbs; i++) { 1005671cb588SHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1006671cb588SHong Zhang } 1007671cb588SHong Zhang 1008671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 1009671cb588SHong Zhang 1010671cb588SHong Zhang /* for each row k */ 1011671cb588SHong Zhang for (k = 0; k<mbs; k++){ 1012671cb588SHong Zhang 1013671cb588SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 1014671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 1015057f5ba7SHong Zhang 1016671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 1017671cb588SHong Zhang vj = aj[j]; 1018671cb588SHong Zhang rtmp[vj] = aa[j]; 1019671cb588SHong Zhang } 1020671cb588SHong Zhang 1021671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1022671cb588SHong Zhang dk = rtmp[k]; 1023671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 1024671cb588SHong Zhang 1025057f5ba7SHong Zhang while (i < k){ 1026671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 1027671cb588SHong Zhang 1028671cb588SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 1029671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1030671cb588SHong Zhang uikdi = - ba[ili]*ba[i]; 1031671cb588SHong Zhang dk += uikdi*ba[ili]; 1032671cb588SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 1033671cb588SHong Zhang 1034671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 1035671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 1036671cb588SHong Zhang if (jmin < jmax){ 1037671cb588SHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1038671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 1039671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1040671cb588SHong Zhang j = bj[jmin]; 1041671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 1042671cb588SHong Zhang } 1043671cb588SHong Zhang i = nexti; 1044671cb588SHong Zhang } 1045671cb588SHong Zhang 1046671cb588SHong Zhang /* check for zero pivot and save diagoanl element */ 1047671cb588SHong Zhang if (dk == 0.0){ 1048671cb588SHong Zhang SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1049671cb588SHong Zhang /* 1050671cb588SHong Zhang } else if (PetscRealPart(dk) < 0){ 10511223c01bSHong Zhang SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1052671cb588SHong Zhang */ 1053671cb588SHong Zhang } 1054671cb588SHong Zhang 1055671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 1056671cb588SHong Zhang ba[k] = 1.0/dk; 1057671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 1058671cb588SHong Zhang if (jmin < jmax) { 1059671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 1060671cb588SHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1061671cb588SHong Zhang } 1062671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1063671cb588SHong Zhang il[k] = jmin; 1064671cb588SHong Zhang i = bj[jmin]; 1065671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 1066671cb588SHong Zhang } 1067671cb588SHong Zhang } 1068671cb588SHong Zhang 1069671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1070671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1071671cb588SHong Zhang 1072671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 1073671cb588SHong Zhang C->assembled = PETSC_TRUE; 1074671cb588SHong Zhang C->preallocated = PETSC_TRUE; 1075b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 1076671cb588SHong Zhang PetscFunctionReturn(0); 1077671cb588SHong Zhang } 1078671cb588SHong Zhang 10794a2ae208SSatish Balay #undef __FUNCT__ 10804a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1081b8b23757SHong Zhang int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 108249b5e25fSSatish Balay { 10834445ddedSHong Zhang int ierr; 108449b5e25fSSatish Balay Mat C; 108549b5e25fSSatish Balay 108649b5e25fSSatish Balay PetscFunctionBegin; 1087b8b23757SHong Zhang ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1088a4ada70bSHong Zhang ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 10894445ddedSHong Zhang ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 109049b5e25fSSatish Balay PetscFunctionReturn(0); 109149b5e25fSSatish Balay } 109249b5e25fSSatish Balay 109349b5e25fSSatish Balay 1094