1*4a2ae208SSatish Balay /*$Id: sbaijfact.c,v 1.57 2001/02/01 17:01:40 bsmith Exp balay $*/ 249b5e25fSSatish Balay /* Using Modified Sparse Row (MSR) storage. 349b5e25fSSatish Balay See page 85, "Iterative Methods ..." by Saad. */ 449b5e25fSSatish Balay 549b5e25fSSatish Balay /* 6effa298cSHong Zhang Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 749b5e25fSSatish Balay */ 849b5e25fSSatish Balay #include "sbaij.h" 949b5e25fSSatish Balay #include "src/mat/impls/baij/seq/baij.h" 1049b5e25fSSatish Balay #include "src/vec/vecimpl.h" 1149b5e25fSSatish Balay #include "src/inline/ilu.h" 1249a6740bSHong Zhang #include "include/petscis.h" 1349b5e25fSSatish Balay 14*4a2ae208SSatish Balay #undef __FUNCT__ 15*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 1677f73120SHong Zhang int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 1749b5e25fSSatish Balay { 1849b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 19cb718733SHong Zhang int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 20066653e3SSatish Balay int *jutmp,bs = a->bs,bs2=a->bs2; 21066653e3SSatish Balay int m,nzi,realloc = 0; 22066653e3SSatish Balay int *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 23671cb588SHong Zhang PetscTruth perm_identity; 2449b5e25fSSatish Balay 2549b5e25fSSatish Balay PetscFunctionBegin; 26671cb588SHong Zhang if (!perm) { 27671cb588SHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,mbs,0,1,&perm);CHKERRQ(ierr); 28671cb588SHong Zhang } 2977f73120SHong Zhang PetscValidHeaderSpecific(perm,IS_COOKIE); 302d007305SHong Zhang 31cb718733SHong Zhang /* check whether perm is the identity mapping */ 32671cb588SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 33671cb588SHong Zhang if (!perm_identity) a->permute = PETSC_TRUE; 342d007305SHong Zhang 35671cb588SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 36671cb588SHong Zhang 37671cb588SHong Zhang if (perm_identity){ /* without permutation */ 382d007305SHong Zhang ai = a->i; aj = a->j; 392d007305SHong Zhang } else { /* non-trivial permutation */ 40323b833fSBarry Smith ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 412d007305SHong Zhang ai = a->inew; aj = a->jnew; 422d007305SHong Zhang } 432d007305SHong Zhang 4449b5e25fSSatish Balay /* initialization */ 4549b5e25fSSatish Balay /* Don't know how many column pointers are needed so estimate. 4649b5e25fSSatish Balay Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 47b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 4849b5e25fSSatish Balay umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 4982502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 5049b5e25fSSatish Balay iu[0] = mbs+1; 5149b5e25fSSatish Balay juptr = mbs; 5282502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 5382502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(int),&q);CHKERRQ(ierr); 5449b5e25fSSatish Balay for (i=0; i<mbs; i++){ 5549b5e25fSSatish Balay jl[i] = mbs; q[i] = 0; 5649b5e25fSSatish Balay } 5749b5e25fSSatish Balay 5849b5e25fSSatish Balay /* for each row k */ 5949b5e25fSSatish Balay for (k=0; k<mbs; k++){ 6049b5e25fSSatish Balay nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 6149b5e25fSSatish Balay q[k] = mbs; 6249b5e25fSSatish Balay /* initialize nonzero structure of k-th row to row rip[k] of A */ 6349b5e25fSSatish Balay jmin = ai[rip[k]]; 6449b5e25fSSatish Balay jmax = ai[rip[k]+1]; 6549b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 66cb718733SHong Zhang vj = rip[aj[j]]; /* col. value */ 6749b5e25fSSatish Balay if(vj > k){ 6849b5e25fSSatish Balay qm = k; 6949b5e25fSSatish Balay do { 7049b5e25fSSatish Balay m = qm; qm = q[m]; 7149b5e25fSSatish Balay } while(qm < vj); 7249b5e25fSSatish Balay if (qm == vj) { 7349b5e25fSSatish Balay printf(" error: duplicate entry in A\n"); break; 7449b5e25fSSatish Balay } 7549b5e25fSSatish Balay nzk++; 7649b5e25fSSatish Balay q[m] = vj; 7749b5e25fSSatish Balay q[vj] = qm; 7849b5e25fSSatish Balay } /* if(vj > k) */ 7949b5e25fSSatish Balay } /* for (j=jmin; j<jmax; j++) */ 8049b5e25fSSatish Balay 8149b5e25fSSatish Balay /* modify nonzero structure of k-th row by computing fill-in 8249b5e25fSSatish Balay for each row i to be merged in */ 8349b5e25fSSatish Balay i = k; 8449b5e25fSSatish Balay i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */ 8549b5e25fSSatish Balay /* printf(" next pivot row i=%d\n",i); */ 8649b5e25fSSatish Balay while (i < mbs){ 8749b5e25fSSatish Balay /* merge row i into k-th row */ 8849b5e25fSSatish Balay nzi = iu[i+1] - (iu[i]+1); 8949b5e25fSSatish Balay jmin = iu[i] + 1; jmax = iu[i] + nzi; 9049b5e25fSSatish Balay qm = k; 9149b5e25fSSatish Balay for (j=jmin; j<jmax+1; j++){ 9249b5e25fSSatish Balay vj = ju[j]; 9349b5e25fSSatish Balay do { 9449b5e25fSSatish Balay m = qm; qm = q[m]; 9549b5e25fSSatish Balay } while (qm < vj); 9649b5e25fSSatish Balay if (qm != vj){ 9749b5e25fSSatish Balay nzk++; q[m] = vj; q[vj] = qm; qm = vj; 9849b5e25fSSatish Balay } 9949b5e25fSSatish Balay } 10049b5e25fSSatish Balay i = jl[i]; /* next pivot row */ 10149b5e25fSSatish Balay } 10249b5e25fSSatish Balay 10349b5e25fSSatish Balay /* add k to row list for first nonzero element in k-th row */ 10449b5e25fSSatish Balay if (nzk > 0){ 10549b5e25fSSatish Balay i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 10649b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 10749b5e25fSSatish Balay } 10849b5e25fSSatish Balay iu[k+1] = iu[k] + nzk; /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/ 10949b5e25fSSatish Balay 11049b5e25fSSatish Balay /* allocate more space to ju if needed */ 11149b5e25fSSatish Balay if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax); 11249b5e25fSSatish Balay /* estimate how much additional space we will need */ 11349b5e25fSSatish Balay /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 11449b5e25fSSatish Balay /* just double the memory each time */ 11549b5e25fSSatish Balay maxadd = umax; 11649b5e25fSSatish Balay if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 11749b5e25fSSatish Balay umax += maxadd; 11849b5e25fSSatish Balay 1199f9cb213SHong Zhang /* allocate a longer ju */ 12082502324SSatish Balay ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 12149b5e25fSSatish Balay ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 12249b5e25fSSatish Balay ierr = PetscFree(ju);CHKERRQ(ierr); 1239f9cb213SHong Zhang ju = jutmp; 12449b5e25fSSatish Balay realloc++; /* count how many times we realloc */ 12549b5e25fSSatish Balay } 12649b5e25fSSatish Balay 12749b5e25fSSatish Balay /* save nonzero structure of k-th row in ju */ 12849b5e25fSSatish Balay i=k; 12949b5e25fSSatish Balay jumin = juptr + 1; juptr += nzk; 13049b5e25fSSatish Balay for (j=jumin; j<juptr+1; j++){ 13149b5e25fSSatish Balay i=q[i]; 13249b5e25fSSatish Balay ju[j]=i; 13349b5e25fSSatish Balay } 13477f73120SHong Zhang } 13549b5e25fSSatish Balay 13649b5e25fSSatish Balay if (ai[mbs] != 0) { 13749b5e25fSSatish Balay PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 138b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 139b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af); 140b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af); 141b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 14249b5e25fSSatish Balay } else { 143b0a32e0cSBarry Smith PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 14449b5e25fSSatish Balay } 14549b5e25fSSatish Balay 14677f73120SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 14749b5e25fSSatish Balay ierr = PetscFree(q);CHKERRQ(ierr); 14849b5e25fSSatish Balay ierr = PetscFree(jl);CHKERRQ(ierr); 14949b5e25fSSatish Balay 15049b5e25fSSatish Balay /* put together the new matrix */ 15149b5e25fSSatish Balay ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 152b0a32e0cSBarry Smith /* PetscLogObjectParent(*B,iperm); */ 15349b5e25fSSatish Balay b = (Mat_SeqSBAIJ*)(*B)->data; 15449b5e25fSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 15549b5e25fSSatish Balay b->singlemalloc = PETSC_FALSE; 15649b5e25fSSatish Balay /* the next line frees the default space generated by the Create() */ 15749b5e25fSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 15849b5e25fSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 15982502324SSatish Balay ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 16049b5e25fSSatish Balay b->j = ju; 16149b5e25fSSatish Balay b->i = iu; 16249b5e25fSSatish Balay b->diag = 0; 16349b5e25fSSatish Balay b->ilen = 0; 16449b5e25fSSatish Balay b->imax = 0; 16577f73120SHong Zhang b->row = perm; 16677f73120SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 167cb718733SHong Zhang b->icol = perm; 168cb718733SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 169b0a32e0cSBarry Smith ierr = PetscMalloc((bs*mbs+bs)*sizeof(Scalar),&b->solve_work);CHKERRQ(ierr); 17049b5e25fSSatish Balay /* In b structure: Free imax, ilen, old a, old j. 17149b5e25fSSatish Balay Allocate idnew, solve_work, new a, new j */ 172b0a32e0cSBarry Smith PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 17349b5e25fSSatish Balay b->s_maxnz = b->s_nz = iu[mbs]; 17449b5e25fSSatish Balay 1755c0bcdfcSHong Zhang (*B)->factor = FACTOR_CHOLESKY; 17649b5e25fSSatish Balay (*B)->info.factor_mallocs = realloc; 17749b5e25fSSatish Balay (*B)->info.fill_ratio_given = f; 17849b5e25fSSatish Balay if (ai[mbs] != 0) { 17949b5e25fSSatish Balay (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 18049b5e25fSSatish Balay } else { 18149b5e25fSSatish Balay (*B)->info.fill_ratio_needed = 0.0; 18249b5e25fSSatish Balay } 1830aa0ce06SHong Zhang 184671cb588SHong Zhang if (perm_identity){ 185671cb588SHong Zhang switch (bs) { 186671cb588SHong Zhang case 1: 187671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 188671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 189b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 190671cb588SHong Zhang break; 191671cb588SHong Zhang case 2: 192671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 193671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 194b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 195671cb588SHong Zhang break; 196671cb588SHong Zhang case 3: 197671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 198671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 199b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 200671cb588SHong Zhang break; 201671cb588SHong Zhang case 4: 202671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 203671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 204b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 205671cb588SHong Zhang break; 206671cb588SHong Zhang case 5: 207671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 208671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 209b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 210671cb588SHong Zhang break; 211671cb588SHong Zhang case 6: 212671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 213671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 214b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 215671cb588SHong Zhang break; 216671cb588SHong Zhang case 7: 217671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 218671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 219b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 220671cb588SHong Zhang break; 221671cb588SHong Zhang default: 222671cb588SHong Zhang (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 223671cb588SHong Zhang (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 224b0a32e0cSBarry Smith PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 225671cb588SHong Zhang break; 226671cb588SHong Zhang } 227671cb588SHong Zhang } 228671cb588SHong Zhang 22949b5e25fSSatish Balay PetscFunctionReturn(0); 23049b5e25fSSatish Balay } 23149b5e25fSSatish Balay 23235d8aa7fSBarry Smith 233*4a2ae208SSatish Balay #undef __FUNCT__ 234*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 2356f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 23649b5e25fSSatish Balay { 23749b5e25fSSatish Balay Mat C = *B; 2384c16a6a6SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 2394c16a6a6SHong Zhang IS perm = b->row; 2404c16a6a6SHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 2414c16a6a6SHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 2424c16a6a6SHong Zhang int bs=a->bs,bs2 = a->bs2; 2434c16a6a6SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 2444c16a6a6SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 24528de702eSHong Zhang MatScalar *work; 2464c16a6a6SHong Zhang int *pivots; 2474c16a6a6SHong Zhang 2484c16a6a6SHong Zhang PetscFunctionBegin; 2499706f043SHong Zhang 2504c16a6a6SHong Zhang /* initialization */ 25182502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2524c16a6a6SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 25382502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 25428de702eSHong Zhang jl = il + mbs; 2554c16a6a6SHong Zhang for (i=0; i<mbs; i++) { 2564c16a6a6SHong Zhang jl[i] = mbs; il[0] = 0; 2574c16a6a6SHong Zhang } 258b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 25928de702eSHong Zhang uik = dk + bs2; 26028de702eSHong Zhang work = uik + bs2; 26182502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 2624c16a6a6SHong Zhang 2634c16a6a6SHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 2644c16a6a6SHong Zhang 2654c16a6a6SHong Zhang /* check permutation */ 2664c16a6a6SHong Zhang if (!a->permute){ 2674c16a6a6SHong Zhang ai = a->i; aj = a->j; aa = a->a; 2684c16a6a6SHong Zhang } else { 2694c16a6a6SHong Zhang ai = a->inew; aj = a->jnew; 27082502324SSatish Balay ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 2714c16a6a6SHong Zhang ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 27282502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 2734c16a6a6SHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 2744c16a6a6SHong Zhang 2754c16a6a6SHong Zhang for (i=0; i<mbs; i++){ 2764c16a6a6SHong Zhang jmin = ai[i]; jmax = ai[i+1]; 2774c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 2784c16a6a6SHong Zhang while (a2anew[j] != j){ 2794c16a6a6SHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 2804c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 2814c16a6a6SHong Zhang dk[k1] = aa[k*bs2+k1]; 2824c16a6a6SHong Zhang aa[k*bs2+k1] = aa[j*bs2+k1]; 2834c16a6a6SHong Zhang aa[j*bs2+k1] = dk[k1]; 2844c16a6a6SHong Zhang } 2854c16a6a6SHong Zhang } 2864c16a6a6SHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 2874c16a6a6SHong Zhang if (i > aj[j]){ 2884c16a6a6SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 2894c16a6a6SHong Zhang ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 2904c16a6a6SHong Zhang for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 2914c16a6a6SHong Zhang for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 2924c16a6a6SHong Zhang for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 2934c16a6a6SHong Zhang } 2944c16a6a6SHong Zhang } 2954c16a6a6SHong Zhang } 2964c16a6a6SHong Zhang } 297323b833fSBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 2984c16a6a6SHong Zhang } 2994c16a6a6SHong Zhang 3004c16a6a6SHong Zhang /* for each row k */ 3014c16a6a6SHong Zhang for (k = 0; k<mbs; k++){ 3024c16a6a6SHong Zhang 3034c16a6a6SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 3044c16a6a6SHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 3054c16a6a6SHong Zhang if (jmin < jmax) { 3064c16a6a6SHong Zhang ap = aa + jmin*bs2; 3074c16a6a6SHong Zhang for (j = jmin; j < jmax; j++){ 3084c16a6a6SHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 3094c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 3104c16a6a6SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 3114c16a6a6SHong Zhang } 3124c16a6a6SHong Zhang } 3134c16a6a6SHong Zhang 3144c16a6a6SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 3154c16a6a6SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3164c16a6a6SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 3174c16a6a6SHong Zhang 3184c16a6a6SHong Zhang while (i < mbs){ 3194c16a6a6SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 3204c16a6a6SHong Zhang 3214c16a6a6SHong Zhang /* compute multiplier */ 3224c16a6a6SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 3234c16a6a6SHong Zhang 3244c16a6a6SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 3254c16a6a6SHong Zhang diag = ba + i*bs2; 3264c16a6a6SHong Zhang u = ba + ili*bs2; 3274c16a6a6SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3284c16a6a6SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 3294c16a6a6SHong Zhang 3304c16a6a6SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 3314c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 3324c16a6a6SHong Zhang 3334c16a6a6SHong Zhang /* update -U(i,k) */ 3344c16a6a6SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3354c16a6a6SHong Zhang 3364c16a6a6SHong Zhang /* add multiple of row i to k-th row ... */ 3374c16a6a6SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 3384c16a6a6SHong Zhang if (jmin < jmax){ 3394c16a6a6SHong Zhang for (j=jmin; j<jmax; j++) { 3404c16a6a6SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 3414c16a6a6SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 3424c16a6a6SHong Zhang u = ba + j*bs2; 3434c16a6a6SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 3444c16a6a6SHong Zhang } 3454c16a6a6SHong Zhang 3464c16a6a6SHong Zhang /* ... add i to row list for next nonzero entry */ 3474c16a6a6SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 3484c16a6a6SHong Zhang j = bj[jmin]; 3494c16a6a6SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 3504c16a6a6SHong Zhang } 3514c16a6a6SHong Zhang i = nexti; 3524c16a6a6SHong Zhang } 3534c16a6a6SHong Zhang 3544c16a6a6SHong Zhang /* save nonzero entries in k-th row of U ... */ 3554c16a6a6SHong Zhang 3564c16a6a6SHong Zhang /* invert diagonal block */ 3574c16a6a6SHong Zhang diag = ba+k*bs2; 3584c16a6a6SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 3594c16a6a6SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 3604c16a6a6SHong Zhang 3614c16a6a6SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 3624c16a6a6SHong Zhang if (jmin < jmax) { 3634c16a6a6SHong Zhang for (j=jmin; j<jmax; j++){ 3644c16a6a6SHong Zhang vj = bj[j]; /* block col. index of U */ 3654c16a6a6SHong Zhang u = ba + j*bs2; 3664c16a6a6SHong Zhang rtmp_ptr = rtmp + vj*bs2; 3674c16a6a6SHong Zhang for (k1=0; k1<bs2; k1++){ 3684c16a6a6SHong Zhang *u++ = *rtmp_ptr; 3694c16a6a6SHong Zhang *rtmp_ptr++ = 0.0; 3704c16a6a6SHong Zhang } 3714c16a6a6SHong Zhang } 3724c16a6a6SHong Zhang 3734c16a6a6SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 3744c16a6a6SHong Zhang il[k] = jmin; 3754c16a6a6SHong Zhang i = bj[jmin]; 3764c16a6a6SHong Zhang jl[k] = jl[i]; jl[i] = k; 3774c16a6a6SHong Zhang } 3784c16a6a6SHong Zhang } 3794c16a6a6SHong Zhang 3804c16a6a6SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 3814c16a6a6SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 3824c16a6a6SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 3834c16a6a6SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 3844c16a6a6SHong Zhang if (a->permute){ 3854c16a6a6SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 3864c16a6a6SHong Zhang } 3874c16a6a6SHong Zhang 3884c16a6a6SHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 3894c16a6a6SHong Zhang C->factor = FACTOR_CHOLESKY; 3904c16a6a6SHong Zhang C->assembled = PETSC_TRUE; 3914c16a6a6SHong Zhang C->preallocated = PETSC_TRUE; 392b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 3934c16a6a6SHong Zhang PetscFunctionReturn(0); 3944c16a6a6SHong Zhang } 395d4edadd4SHong Zhang 396*4a2ae208SSatish Balay #undef __FUNCT__ 397*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 398671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 399671cb588SHong Zhang { 400671cb588SHong Zhang Mat C = *B; 401671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 402671cb588SHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 4031ad1de41SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 404671cb588SHong Zhang int bs=a->bs,bs2 = a->bs2; 405671cb588SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 406671cb588SHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 40728de702eSHong Zhang MatScalar *work; 408671cb588SHong Zhang int *pivots; 409671cb588SHong Zhang 410671cb588SHong Zhang PetscFunctionBegin; 411671cb588SHong Zhang 412671cb588SHong Zhang /* initialization */ 413671cb588SHong Zhang 41482502324SSatish Balay ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 415671cb588SHong Zhang ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 41682502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 41728de702eSHong Zhang jl = il + mbs; 418671cb588SHong Zhang for (i=0; i<mbs; i++) { 419671cb588SHong Zhang jl[i] = mbs; il[0] = 0; 420671cb588SHong Zhang } 421b0a32e0cSBarry Smith ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 42228de702eSHong Zhang uik = dk + bs2; 42328de702eSHong Zhang work = uik + bs2; 42482502324SSatish Balay ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 425671cb588SHong Zhang 426671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 427671cb588SHong Zhang 428671cb588SHong Zhang /* for each row k */ 429671cb588SHong Zhang for (k = 0; k<mbs; k++){ 430671cb588SHong Zhang 431671cb588SHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 432671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 433671cb588SHong Zhang if (jmin < jmax) { 434671cb588SHong Zhang ap = aa + jmin*bs2; 435671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 436671cb588SHong Zhang vj = aj[j]; /* block col. index */ 437671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 438671cb588SHong Zhang for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 439671cb588SHong Zhang } 440671cb588SHong Zhang } 441671cb588SHong Zhang 442671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 443671cb588SHong Zhang ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 444671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 445671cb588SHong Zhang 446671cb588SHong Zhang while (i < mbs){ 447671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 448671cb588SHong Zhang 449671cb588SHong Zhang /* compute multiplier */ 450671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 451671cb588SHong Zhang 452671cb588SHong Zhang /* uik = -inv(Di)*U_bar(i,k) */ 453671cb588SHong Zhang diag = ba + i*bs2; 454671cb588SHong Zhang u = ba + ili*bs2; 455671cb588SHong Zhang ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 456671cb588SHong Zhang Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 457671cb588SHong Zhang 458671cb588SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 459671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 460671cb588SHong Zhang 461671cb588SHong Zhang /* update -U(i,k) */ 462671cb588SHong Zhang ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 463671cb588SHong Zhang 464671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 465671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 466671cb588SHong Zhang if (jmin < jmax){ 467671cb588SHong Zhang for (j=jmin; j<jmax; j++) { 468671cb588SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j) */ 469671cb588SHong Zhang rtmp_ptr = rtmp + bj[j]*bs2; 470671cb588SHong Zhang u = ba + j*bs2; 471671cb588SHong Zhang Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 472671cb588SHong Zhang } 473671cb588SHong Zhang 474671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 475671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 476671cb588SHong Zhang j = bj[jmin]; 477671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 478671cb588SHong Zhang } 479671cb588SHong Zhang i = nexti; 480671cb588SHong Zhang } 481671cb588SHong Zhang 482671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 483671cb588SHong Zhang 484671cb588SHong Zhang /* invert diagonal block */ 485671cb588SHong Zhang diag = ba+k*bs2; 486671cb588SHong Zhang ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 487671cb588SHong Zhang Kernel_A_gets_inverse_A(bs,diag,pivots,work); 488671cb588SHong Zhang 489671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 490671cb588SHong Zhang if (jmin < jmax) { 491671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 492671cb588SHong Zhang vj = bj[j]; /* block col. index of U */ 493671cb588SHong Zhang u = ba + j*bs2; 494671cb588SHong Zhang rtmp_ptr = rtmp + vj*bs2; 495671cb588SHong Zhang for (k1=0; k1<bs2; k1++){ 496671cb588SHong Zhang *u++ = *rtmp_ptr; 497671cb588SHong Zhang *rtmp_ptr++ = 0.0; 498671cb588SHong Zhang } 499671cb588SHong Zhang } 500671cb588SHong Zhang 501671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 502671cb588SHong Zhang il[k] = jmin; 503671cb588SHong Zhang i = bj[jmin]; 504671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 505671cb588SHong Zhang } 506671cb588SHong Zhang } 507671cb588SHong Zhang 508671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 509671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 510671cb588SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 511671cb588SHong Zhang ierr = PetscFree(pivots);CHKERRQ(ierr); 512671cb588SHong Zhang 513671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 514671cb588SHong Zhang C->assembled = PETSC_TRUE; 515671cb588SHong Zhang C->preallocated = PETSC_TRUE; 516b0a32e0cSBarry Smith PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 517671cb588SHong Zhang PetscFunctionReturn(0); 518671cb588SHong Zhang } 519671cb588SHong Zhang 52049b5e25fSSatish Balay /* 521fcf159c0SHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 522cc0c071aSHong Zhang Version for blocks 2 by 2. 52349b5e25fSSatish Balay */ 524*4a2ae208SSatish Balay #undef __FUNCT__ 525*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 5266f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 52749b5e25fSSatish Balay { 52849b5e25fSSatish Balay Mat C = *B; 52991602c66SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 530cc0c071aSHong Zhang IS perm = b->row; 531cc0c071aSHong Zhang int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 532cc0c071aSHong Zhang int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 533a1723e09SHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 534cc0c071aSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 53549b5e25fSSatish Balay 53649b5e25fSSatish Balay PetscFunctionBegin; 537671cb588SHong Zhang 53891602c66SHong Zhang /* initialization */ 53991602c66SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 54091602c66SHong Zhang window U(0:k, k:mbs-1). 54191602c66SHong Zhang jl: list of rows to be added to uneliminated rows 54291602c66SHong Zhang i>= k: jl(i) is the first row to be added to row i 54391602c66SHong Zhang i< k: jl(i) is the row following row i in some list of rows 54491602c66SHong Zhang jl(i) = mbs indicates the end of a list 54591602c66SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 54691602c66SHong Zhang row i of U */ 54782502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 548cc0c071aSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 54982502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 55028de702eSHong Zhang jl = il + mbs; 55191602c66SHong Zhang for (i=0; i<mbs; i++) { 5523845f261SHong Zhang jl[i] = mbs; il[0] = 0; 55391602c66SHong Zhang } 55482502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 55528de702eSHong Zhang uik = dk + 4; 556cc0c071aSHong Zhang ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 557cc0c071aSHong Zhang 558cc0c071aSHong Zhang /* check permutation */ 559cc0c071aSHong Zhang if (!a->permute){ 560cc0c071aSHong Zhang ai = a->i; aj = a->j; aa = a->a; 561cc0c071aSHong Zhang } else { 562cc0c071aSHong Zhang ai = a->inew; aj = a->jnew; 56382502324SSatish Balay ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 564cc0c071aSHong Zhang ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 56582502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 566cc0c071aSHong Zhang ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 567cc0c071aSHong Zhang 568cc0c071aSHong Zhang for (i=0; i<mbs; i++){ 569cc0c071aSHong Zhang jmin = ai[i]; jmax = ai[i+1]; 570cc0c071aSHong Zhang for (j=jmin; j<jmax; j++){ 571cc0c071aSHong Zhang while (a2anew[j] != j){ 572cc0c071aSHong Zhang k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 573cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 574cc0c071aSHong Zhang dk[k1] = aa[k*4+k1]; 575cc0c071aSHong Zhang aa[k*4+k1] = aa[j*4+k1]; 576cc0c071aSHong Zhang aa[j*4+k1] = dk[k1]; 577cc0c071aSHong Zhang } 578cc0c071aSHong Zhang } 579cc0c071aSHong Zhang /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 580cc0c071aSHong Zhang if (i > aj[j]){ 581a1723e09SHong Zhang /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 582cc0c071aSHong Zhang ap = aa + j*4; /* ptr to the beginning of the block */ 583cc0c071aSHong Zhang dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 584cc0c071aSHong Zhang ap[1] = ap[2]; 585cc0c071aSHong Zhang ap[2] = dk[1]; 586cc0c071aSHong Zhang } 587cc0c071aSHong Zhang } 588cc0c071aSHong Zhang } 589ac355199SBarry Smith ierr = PetscFree(a2anew);CHKERRQ(ierr); 590cc0c071aSHong Zhang } 5913845f261SHong Zhang 59291602c66SHong Zhang /* for each row k */ 59391602c66SHong Zhang for (k = 0; k<mbs; k++){ 59491602c66SHong Zhang 59591602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 596cc0c071aSHong Zhang jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 59791602c66SHong Zhang if (jmin < jmax) { 598cc0c071aSHong Zhang ap = aa + jmin*4; 59991602c66SHong Zhang for (j = jmin; j < jmax; j++){ 600cc0c071aSHong Zhang vj = perm_ptr[aj[j]]; /* block col. index */ 601cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 602cc0c071aSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 60391602c66SHong Zhang } 60491602c66SHong Zhang } 60591602c66SHong Zhang 60691602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 607cc0c071aSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 60891602c66SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 60991602c66SHong Zhang 61091602c66SHong Zhang while (i < mbs){ 61191602c66SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 61291602c66SHong Zhang 6133845f261SHong Zhang /* compute multiplier */ 61491602c66SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 6153845f261SHong Zhang 6163845f261SHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 617cc0c071aSHong Zhang diag = ba + i*4; 618cc0c071aSHong Zhang u = ba + ili*4; 619cc0c071aSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 620cc0c071aSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 621cc0c071aSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 622cc0c071aSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 6233845f261SHong Zhang 6243845f261SHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 625cc0c071aSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 626cc0c071aSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 627cc0c071aSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 628cc0c071aSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 6293845f261SHong Zhang 6303845f261SHong Zhang /* update -U(i,k): ba[ili] = uik */ 631cc0c071aSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 63291602c66SHong Zhang 63391602c66SHong Zhang /* add multiple of row i to k-th row ... */ 63491602c66SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 63591602c66SHong Zhang if (jmin < jmax){ 6363845f261SHong Zhang for (j=jmin; j<jmax; j++) { 6373845f261SHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 638cc0c071aSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 639cc0c071aSHong Zhang u = ba + j*4; 640cc0c071aSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 641cc0c071aSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 642cc0c071aSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 643cc0c071aSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 6443845f261SHong Zhang } 6453845f261SHong Zhang 64691602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 64791602c66SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 64891602c66SHong Zhang j = bj[jmin]; 64991602c66SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 65091602c66SHong Zhang } 651a1723e09SHong Zhang i = nexti; 65291602c66SHong Zhang } 653cc0c071aSHong Zhang 65491602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 6553845f261SHong Zhang 656cc0c071aSHong Zhang /* invert diagonal block */ 657cc0c071aSHong Zhang diag = ba+k*4; 658cc0c071aSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 6593845f261SHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 6603845f261SHong Zhang 66191602c66SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 66291602c66SHong Zhang if (jmin < jmax) { 66391602c66SHong Zhang for (j=jmin; j<jmax; j++){ 664cc0c071aSHong Zhang vj = bj[j]; /* block col. index of U */ 665cc0c071aSHong Zhang u = ba + j*4; 666cc0c071aSHong Zhang rtmp_ptr = rtmp + vj*4; 667cc0c071aSHong Zhang for (k1=0; k1<4; k1++){ 668cc0c071aSHong Zhang *u++ = *rtmp_ptr; 669cc0c071aSHong Zhang *rtmp_ptr++ = 0.0; 670cc0c071aSHong Zhang } 671cc0c071aSHong Zhang } 6723845f261SHong Zhang 67391602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 67491602c66SHong Zhang il[k] = jmin; 67591602c66SHong Zhang i = bj[jmin]; 67691602c66SHong Zhang jl[k] = jl[i]; jl[i] = k; 67791602c66SHong Zhang } 67891602c66SHong Zhang } 6793845f261SHong Zhang 68049b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 68191602c66SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 6823845f261SHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 68391602c66SHong Zhang if (a->permute) { 68491602c66SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 68591602c66SHong Zhang } 686cc0c071aSHong Zhang ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 68791602c66SHong Zhang C->factor = FACTOR_CHOLESKY; 68849b5e25fSSatish Balay C->assembled = PETSC_TRUE; 6895c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 690b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 69149b5e25fSSatish Balay PetscFunctionReturn(0); 69249b5e25fSSatish Balay } 69391602c66SHong Zhang 69449b5e25fSSatish Balay /* 69549b5e25fSSatish Balay Version for when blocks are 2 by 2 Using natural ordering 69649b5e25fSSatish Balay */ 697*4a2ae208SSatish Balay #undef __FUNCT__ 698*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 6996f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 70049b5e25fSSatish Balay { 70149b5e25fSSatish Balay Mat C = *B; 702ab27746eSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 703ab27746eSHong Zhang int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 70471149678SHong Zhang int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 705ab27746eSHong Zhang MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 706ab27746eSHong Zhang MatScalar *u,*diag,*rtmp,*rtmp_ptr; 70749b5e25fSSatish Balay 70849b5e25fSSatish Balay PetscFunctionBegin; 709671cb588SHong Zhang 710ab27746eSHong Zhang /* initialization */ 711ab27746eSHong Zhang /* il and jl record the first nonzero element in each row of the accessing 712ab27746eSHong Zhang window U(0:k, k:mbs-1). 713ab27746eSHong Zhang jl: list of rows to be added to uneliminated rows 714ab27746eSHong Zhang i>= k: jl(i) is the first row to be added to row i 715ab27746eSHong Zhang i< k: jl(i) is the row following row i in some list of rows 716ab27746eSHong Zhang jl(i) = mbs indicates the end of a list 717ab27746eSHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 718ab27746eSHong Zhang row i of U */ 71982502324SSatish Balay ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 720ab27746eSHong Zhang ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 72182502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 72228de702eSHong Zhang jl = il + mbs; 723ab27746eSHong Zhang for (i=0; i<mbs; i++) { 724ab27746eSHong Zhang jl[i] = mbs; il[0] = 0; 72549b5e25fSSatish Balay } 72682502324SSatish Balay ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 72728de702eSHong Zhang uik = dk + 4; 728ab27746eSHong Zhang 729ab27746eSHong Zhang ai = a->i; aj = a->j; aa = a->a; 730ab27746eSHong Zhang 731ab27746eSHong Zhang /* for each row k */ 732ab27746eSHong Zhang for (k = 0; k<mbs; k++){ 733ab27746eSHong Zhang 734ab27746eSHong Zhang /*initialize k-th row with elements nonzero in row k of A */ 735ab27746eSHong Zhang jmin = ai[k]; jmax = ai[k+1]; 736ab27746eSHong Zhang if (jmin < jmax) { 737ab27746eSHong Zhang ap = aa + jmin*4; 738ab27746eSHong Zhang for (j = jmin; j < jmax; j++){ 739ab27746eSHong Zhang vj = aj[j]; /* block col. index */ 740ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 741ab27746eSHong Zhang for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 74249b5e25fSSatish Balay } 74349b5e25fSSatish Balay } 744ab27746eSHong Zhang 745ab27746eSHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 746ab27746eSHong Zhang ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 747ab27746eSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 748ab27746eSHong Zhang 749ab27746eSHong Zhang while (i < mbs){ 750ab27746eSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 751ab27746eSHong Zhang 752ab27746eSHong Zhang /* compute multiplier */ 753ab27746eSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 754ab27746eSHong Zhang 755ab27746eSHong Zhang /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 756ab27746eSHong Zhang diag = ba + i*4; 757ab27746eSHong Zhang u = ba + ili*4; 758ab27746eSHong Zhang uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 759ab27746eSHong Zhang uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 760ab27746eSHong Zhang uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 761ab27746eSHong Zhang uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 762ab27746eSHong Zhang 763ab27746eSHong Zhang /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 764ab27746eSHong Zhang dk[0] += uik[0]*u[0] + uik[1]*u[1]; 765ab27746eSHong Zhang dk[1] += uik[2]*u[0] + uik[3]*u[1]; 766ab27746eSHong Zhang dk[2] += uik[0]*u[2] + uik[1]*u[3]; 767ab27746eSHong Zhang dk[3] += uik[2]*u[2] + uik[3]*u[3]; 768ab27746eSHong Zhang 769ab27746eSHong Zhang /* update -U(i,k): ba[ili] = uik */ 770ab27746eSHong Zhang ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 771ab27746eSHong Zhang 772ab27746eSHong Zhang /* add multiple of row i to k-th row ... */ 773ab27746eSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 774ab27746eSHong Zhang if (jmin < jmax){ 775ab27746eSHong Zhang for (j=jmin; j<jmax; j++) { 776ab27746eSHong Zhang /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 777ab27746eSHong Zhang rtmp_ptr = rtmp + bj[j]*4; 778ab27746eSHong Zhang u = ba + j*4; 779ab27746eSHong Zhang rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 780ab27746eSHong Zhang rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 781ab27746eSHong Zhang rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 782ab27746eSHong Zhang rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 78349b5e25fSSatish Balay } 784ab27746eSHong Zhang 785ab27746eSHong Zhang /* ... add i to row list for next nonzero entry */ 786ab27746eSHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 787ab27746eSHong Zhang j = bj[jmin]; 788ab27746eSHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 78949b5e25fSSatish Balay } 790ab27746eSHong Zhang i = nexti; 79149b5e25fSSatish Balay } 792ab27746eSHong Zhang 793ab27746eSHong Zhang /* save nonzero entries in k-th row of U ... */ 794ab27746eSHong Zhang 79549b5e25fSSatish Balay /* invert diagonal block */ 796ab27746eSHong Zhang diag = ba+k*4; 797ab27746eSHong Zhang ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 798ab27746eSHong Zhang ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 799ab27746eSHong Zhang 800ab27746eSHong Zhang jmin = bi[k]; jmax = bi[k+1]; 801ab27746eSHong Zhang if (jmin < jmax) { 802ab27746eSHong Zhang for (j=jmin; j<jmax; j++){ 803ab27746eSHong Zhang vj = bj[j]; /* block col. index of U */ 804ab27746eSHong Zhang u = ba + j*4; 805ab27746eSHong Zhang rtmp_ptr = rtmp + vj*4; 806ab27746eSHong Zhang for (k1=0; k1<4; k1++){ 807ab27746eSHong Zhang *u++ = *rtmp_ptr; 808ab27746eSHong Zhang *rtmp_ptr++ = 0.0; 809ab27746eSHong Zhang } 810ab27746eSHong Zhang } 811ab27746eSHong Zhang 812ab27746eSHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 813ab27746eSHong Zhang il[k] = jmin; 814ab27746eSHong Zhang i = bj[jmin]; 815ab27746eSHong Zhang jl[k] = jl[i]; jl[i] = k; 816ab27746eSHong Zhang } 81749b5e25fSSatish Balay } 81849b5e25fSSatish Balay 81949b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 820ab27746eSHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 821ab27746eSHong Zhang ierr = PetscFree(dk);CHKERRQ(ierr); 822ab27746eSHong Zhang 823ab27746eSHong Zhang C->factor = FACTOR_CHOLESKY; 82449b5e25fSSatish Balay C->assembled = PETSC_TRUE; 8255c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 826b0a32e0cSBarry Smith PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 82749b5e25fSSatish Balay PetscFunctionReturn(0); 82849b5e25fSSatish Balay } 82949b5e25fSSatish Balay 83049b5e25fSSatish Balay /* 8315c0bcdfcSHong Zhang Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 83291602c66SHong Zhang Version for blocks are 1 by 1. 83349b5e25fSSatish Balay */ 834*4a2ae208SSatish Balay #undef __FUNCT__ 835*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 8366f9a564bSHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 83749b5e25fSSatish Balay { 83849b5e25fSSatish Balay Mat C = *B; 83949b5e25fSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 84049b5e25fSSatish Balay IS ip = b->row; 841351d0355SHong Zhang int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 842cb718733SHong Zhang int *ai,*aj,*r; 843ab27746eSHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 844066653e3SSatish Balay MatScalar *rtmp; 8452d007305SHong Zhang MatScalar *ba = b->a,*aa,ak; 84649b5e25fSSatish Balay MatScalar dk,uikdi; 84749b5e25fSSatish Balay 84849b5e25fSSatish Balay PetscFunctionBegin; 84949b5e25fSSatish Balay ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 850cb718733SHong Zhang if (!a->permute){ 8512d007305SHong Zhang ai = a->i; aj = a->j; aa = a->a; 8522d007305SHong Zhang } else { 8532d007305SHong Zhang ai = a->inew; aj = a->jnew; 85482502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 855cb718733SHong Zhang ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 85682502324SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 8572d007305SHong Zhang ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 8582d007305SHong Zhang 8592d007305SHong Zhang jmin = ai[0]; jmax = ai[mbs]; 8602d007305SHong Zhang for (j=jmin; j<jmax; j++){ 861cb718733SHong Zhang while (r[j] != j){ 8622d007305SHong Zhang k = r[j]; r[j] = r[k]; r[k] = k; 8632d007305SHong Zhang ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 8642d007305SHong Zhang } 8652d007305SHong Zhang } 866ac355199SBarry Smith ierr = PetscFree(r);CHKERRQ(ierr); 8672d007305SHong Zhang } 8682d007305SHong Zhang 86991602c66SHong Zhang /* initialization */ 87049b5e25fSSatish Balay /* il and jl record the first nonzero element in each row of the accessing 87149b5e25fSSatish Balay window U(0:k, k:mbs-1). 87249b5e25fSSatish Balay jl: list of rows to be added to uneliminated rows 87349b5e25fSSatish Balay i>= k: jl(i) is the first row to be added to row i 87449b5e25fSSatish Balay i< k: jl(i) is the row following row i in some list of rows 87549b5e25fSSatish Balay jl(i) = mbs indicates the end of a list 87649b5e25fSSatish Balay il(i): points to the first nonzero element in columns k,...,mbs-1 of 87749b5e25fSSatish Balay row i of U */ 87882502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 87982502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 88028de702eSHong Zhang jl = il + mbs; 88149b5e25fSSatish Balay for (i=0; i<mbs; i++) { 88249b5e25fSSatish Balay rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 88349b5e25fSSatish Balay } 88449b5e25fSSatish Balay 88591602c66SHong Zhang /* for each row k */ 88649b5e25fSSatish Balay for (k = 0; k<mbs; k++){ 88749b5e25fSSatish Balay 88891602c66SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 88949b5e25fSSatish Balay jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 89049b5e25fSSatish Balay if (jmin < jmax) { 89149b5e25fSSatish Balay for (j = jmin; j < jmax; j++){ 892351d0355SHong Zhang vj = rip[aj[j]]; 893ab27746eSHong Zhang rtmp[vj] = aa[j]; 89449b5e25fSSatish Balay } 89549b5e25fSSatish Balay } 89649b5e25fSSatish Balay 89791602c66SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 89849b5e25fSSatish Balay dk = rtmp[k]; 89949b5e25fSSatish Balay i = jl[k]; /* first row to be added to k_th row */ 90049b5e25fSSatish Balay 90149b5e25fSSatish Balay while (i < mbs){ 90249b5e25fSSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 90349b5e25fSSatish Balay 90491602c66SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 90549b5e25fSSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 90649b5e25fSSatish Balay uikdi = - ba[ili]*ba[i]; 90749b5e25fSSatish Balay dk += uikdi*ba[ili]; 908658e7b3eSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 90949b5e25fSSatish Balay 91091602c66SHong Zhang /* add multiple of row i to k-th row ... */ 91149b5e25fSSatish Balay jmin = ili + 1; jmax = bi[i+1]; 91249b5e25fSSatish Balay if (jmin < jmax){ 91349b5e25fSSatish Balay for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 91491602c66SHong Zhang /* ... add i to row list for next nonzero entry */ 91549b5e25fSSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 91649b5e25fSSatish Balay j = bj[jmin]; 91749b5e25fSSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 91849b5e25fSSatish Balay } 919ab27746eSHong Zhang i = nexti; 92049b5e25fSSatish Balay } 92149b5e25fSSatish Balay 92291602c66SHong Zhang /* check for zero pivot and save diagoanl element */ 92349b5e25fSSatish Balay if (dk == 0.0){ 92429bbc08cSBarry Smith SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 925671cb588SHong Zhang /* 9268be8c0c7SHong Zhang }else if (PetscRealPart(dk) < 0){ 9278be8c0c7SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 928671cb588SHong Zhang */ 92949b5e25fSSatish Balay } 93049b5e25fSSatish Balay 93191602c66SHong Zhang /* save nonzero entries in k-th row of U ... */ 932f3dd7b2dSKris Buschelman ba[k] = 1.0/dk; 93349b5e25fSSatish Balay jmin = bi[k]; jmax = bi[k+1]; 93449b5e25fSSatish Balay if (jmin < jmax) { 93549b5e25fSSatish Balay for (j=jmin; j<jmax; j++){ 936cc0c071aSHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 93749b5e25fSSatish Balay } 93891602c66SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 93949b5e25fSSatish Balay il[k] = jmin; 94049b5e25fSSatish Balay i = bj[jmin]; 94149b5e25fSSatish Balay jl[k] = jl[i]; jl[i] = k; 94249b5e25fSSatish Balay } 94349b5e25fSSatish Balay } 94449b5e25fSSatish Balay 94549b5e25fSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 94649b5e25fSSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 947cb718733SHong Zhang if (a->permute){ 948cb718733SHong Zhang ierr = PetscFree(aa);CHKERRQ(ierr); 949cb718733SHong Zhang } 95049b5e25fSSatish Balay 95149b5e25fSSatish Balay ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 9528be8c0c7SHong Zhang C->factor = FACTOR_CHOLESKY; 95349b5e25fSSatish Balay C->assembled = PETSC_TRUE; 9545c0bcdfcSHong Zhang C->preallocated = PETSC_TRUE; 955b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 95649b5e25fSSatish Balay PetscFunctionReturn(0); 95749b5e25fSSatish Balay } 95849b5e25fSSatish Balay 959671cb588SHong Zhang /* 960671cb588SHong Zhang Version for when blocks are 1 by 1 Using natural ordering 961671cb588SHong Zhang */ 962*4a2ae208SSatish Balay #undef __FUNCT__ 963*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 964671cb588SHong Zhang int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 965671cb588SHong Zhang { 966671cb588SHong Zhang Mat C = *B; 967671cb588SHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 968671cb588SHong Zhang int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 9691ad1de41SHong Zhang int *ai,*aj; 970671cb588SHong Zhang int k,jmin,jmax,*jl,*il,vj,nexti,ili; 9711ad1de41SHong Zhang MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 972671cb588SHong Zhang 973671cb588SHong Zhang PetscFunctionBegin; 974671cb588SHong Zhang 975671cb588SHong Zhang /* initialization */ 976671cb588SHong Zhang /* il and jl record the first nonzero element in each row of the accessing 977671cb588SHong Zhang window U(0:k, k:mbs-1). 978671cb588SHong Zhang jl: list of rows to be added to uneliminated rows 979671cb588SHong Zhang i>= k: jl(i) is the first row to be added to row i 980671cb588SHong Zhang i< k: jl(i) is the row following row i in some list of rows 981671cb588SHong Zhang jl(i) = mbs indicates the end of a list 982671cb588SHong Zhang il(i): points to the first nonzero element in columns k,...,mbs-1 of 983671cb588SHong Zhang row i of U */ 98482502324SSatish Balay ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 98582502324SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 98628de702eSHong Zhang jl = il + mbs; 987671cb588SHong Zhang for (i=0; i<mbs; i++) { 988671cb588SHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 989671cb588SHong Zhang } 990671cb588SHong Zhang 991671cb588SHong Zhang ai = a->i; aj = a->j; aa = a->a; 992671cb588SHong Zhang 993671cb588SHong Zhang /* for each row k */ 994671cb588SHong Zhang for (k = 0; k<mbs; k++){ 995671cb588SHong Zhang 996671cb588SHong Zhang /*initialize k-th row with elements nonzero in row perm(k) of A */ 997671cb588SHong Zhang jmin = ai[k]; jmax = ai[k+1]; 998671cb588SHong Zhang if (jmin < jmax) { 999671cb588SHong Zhang for (j = jmin; j < jmax; j++){ 1000671cb588SHong Zhang vj = aj[j]; 1001671cb588SHong Zhang rtmp[vj] = aa[j]; 1002671cb588SHong Zhang } 1003671cb588SHong Zhang } 1004671cb588SHong Zhang 1005671cb588SHong Zhang /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1006671cb588SHong Zhang dk = rtmp[k]; 1007671cb588SHong Zhang i = jl[k]; /* first row to be added to k_th row */ 1008671cb588SHong Zhang 1009671cb588SHong Zhang while (i < mbs){ 1010671cb588SHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 1011671cb588SHong Zhang 1012671cb588SHong Zhang /* compute multiplier, update D(k) and U(i,k) */ 1013671cb588SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1014671cb588SHong Zhang uikdi = - ba[ili]*ba[i]; 1015671cb588SHong Zhang dk += uikdi*ba[ili]; 1016671cb588SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 1017671cb588SHong Zhang 1018671cb588SHong Zhang /* add multiple of row i to k-th row ... */ 1019671cb588SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 1020671cb588SHong Zhang if (jmin < jmax){ 1021671cb588SHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1022671cb588SHong Zhang /* ... add i to row list for next nonzero entry */ 1023671cb588SHong Zhang il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1024671cb588SHong Zhang j = bj[jmin]; 1025671cb588SHong Zhang jl[i] = jl[j]; jl[j] = i; /* update jl */ 1026671cb588SHong Zhang } 1027671cb588SHong Zhang i = nexti; 1028671cb588SHong Zhang } 1029671cb588SHong Zhang 1030671cb588SHong Zhang /* check for zero pivot and save diagoanl element */ 1031671cb588SHong Zhang if (dk == 0.0){ 1032671cb588SHong Zhang SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1033671cb588SHong Zhang /* 1034671cb588SHong Zhang }else if (PetscRealPart(dk) < 0){ 1035671cb588SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 1036671cb588SHong Zhang */ 1037671cb588SHong Zhang } 1038671cb588SHong Zhang 1039671cb588SHong Zhang /* save nonzero entries in k-th row of U ... */ 1040671cb588SHong Zhang ba[k] = 1.0/dk; 1041671cb588SHong Zhang jmin = bi[k]; jmax = bi[k+1]; 1042671cb588SHong Zhang if (jmin < jmax) { 1043671cb588SHong Zhang for (j=jmin; j<jmax; j++){ 1044671cb588SHong Zhang vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1045671cb588SHong Zhang } 1046671cb588SHong Zhang /* ... add k to row list for first nonzero entry in k-th row */ 1047671cb588SHong Zhang il[k] = jmin; 1048671cb588SHong Zhang i = bj[jmin]; 1049671cb588SHong Zhang jl[k] = jl[i]; jl[i] = k; 1050671cb588SHong Zhang } 1051671cb588SHong Zhang } 1052671cb588SHong Zhang 1053671cb588SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1054671cb588SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 1055671cb588SHong Zhang 1056671cb588SHong Zhang C->factor = FACTOR_CHOLESKY; 1057671cb588SHong Zhang C->assembled = PETSC_TRUE; 1058671cb588SHong Zhang C->preallocated = PETSC_TRUE; 1059b0a32e0cSBarry Smith PetscLogFlops(b->mbs); 1060671cb588SHong Zhang PetscFunctionReturn(0); 1061671cb588SHong Zhang } 1062671cb588SHong Zhang 1063*4a2ae208SSatish Balay #undef __FUNCT__ 1064*4a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1065b8b23757SHong Zhang int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 106649b5e25fSSatish Balay { 10674445ddedSHong Zhang int ierr; 106849b5e25fSSatish Balay Mat C; 106949b5e25fSSatish Balay 107049b5e25fSSatish Balay PetscFunctionBegin; 1071b8b23757SHong Zhang ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1072a4ada70bSHong Zhang ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 10734445ddedSHong Zhang ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 107449b5e25fSSatish Balay PetscFunctionReturn(0); 107549b5e25fSSatish Balay } 107649b5e25fSSatish Balay 107749b5e25fSSatish Balay 1078