1*81278733SSatish Balay #include "sbaij.h" 2*81278733SSatish Balay #include "src/inline/ilu.h" 3*81278733SSatish Balay 4*81278733SSatish Balay /* Version for when blocks are 7 by 7 */ 5*81278733SSatish Balay #undef __FUNC__ 6*81278733SSatish Balay #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_7" 7*81278733SSatish Balay int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat A,Mat *B) 8*81278733SSatish Balay { 9*81278733SSatish Balay Mat C = *B; 10*81278733SSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 11*81278733SSatish Balay IS perm = b->row; 12*81278733SSatish Balay int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 13*81278733SSatish Balay int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 14*81278733SSatish Balay MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 15*81278733SSatish Balay MatScalar *u,*d,*w,*wp; 16*81278733SSatish Balay 17*81278733SSatish Balay PetscFunctionBegin; 18*81278733SSatish Balay /* initialization */ 19*81278733SSatish Balay ierr = PetscMalloc(49*mbs*sizeof(MatScalar),&w);CHKERRQ(ierr); 20*81278733SSatish Balay ierr = PetscMemzero(w,49*mbs*sizeof(MatScalar));CHKERRQ(ierr); 21*81278733SSatish Balay ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 22*81278733SSatish Balay jl = il + mbs; 23*81278733SSatish Balay for (i=0; i<mbs; i++) { 24*81278733SSatish Balay jl[i] = mbs; il[0] = 0; 25*81278733SSatish Balay } 26*81278733SSatish Balay ierr = PetscMalloc(98*sizeof(MatScalar),&dk);CHKERRQ(ierr); 27*81278733SSatish Balay uik = dk + 49; 28*81278733SSatish Balay ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 29*81278733SSatish Balay 30*81278733SSatish Balay /* check permutation */ 31*81278733SSatish Balay if (!a->permute){ 32*81278733SSatish Balay ai = a->i; aj = a->j; aa = a->a; 33*81278733SSatish Balay } else { 34*81278733SSatish Balay ai = a->inew; aj = a->jnew; 35*81278733SSatish Balay ierr = PetscMalloc(49*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 36*81278733SSatish Balay ierr = PetscMemcpy(aa,a->a,49*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 37*81278733SSatish Balay ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 38*81278733SSatish Balay ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 39*81278733SSatish Balay 40*81278733SSatish Balay for (i=0; i<mbs; i++){ 41*81278733SSatish Balay jmin = ai[i]; jmax = ai[i+1]; 42*81278733SSatish Balay for (j=jmin; j<jmax; j++){ 43*81278733SSatish Balay while (a2anew[j] != j){ 44*81278733SSatish Balay k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 45*81278733SSatish Balay for (k1=0; k1<49; k1++){ 46*81278733SSatish Balay dk[k1] = aa[k*49+k1]; 47*81278733SSatish Balay aa[k*49+k1] = aa[j*49+k1]; 48*81278733SSatish Balay aa[j*49+k1] = dk[k1]; 49*81278733SSatish Balay } 50*81278733SSatish Balay } 51*81278733SSatish Balay /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 52*81278733SSatish Balay if (i > aj[j]){ 53*81278733SSatish Balay /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 54*81278733SSatish Balay ap = aa + j*49; /* ptr to the beginning of j-th block of aa */ 55*81278733SSatish Balay for (k=0; k<49; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 56*81278733SSatish Balay for (k=0; k<7; k++){ /* j-th block of aa <- dk^T */ 57*81278733SSatish Balay for (k1=0; k1<7; k1++) *ap++ = dk[k + 7*k1]; 58*81278733SSatish Balay } 59*81278733SSatish Balay } 60*81278733SSatish Balay } 61*81278733SSatish Balay } 62*81278733SSatish Balay ierr = PetscFree(a2anew);CHKERRQ(ierr); 63*81278733SSatish Balay } 64*81278733SSatish Balay 65*81278733SSatish Balay /* for each row k */ 66*81278733SSatish Balay for (k = 0; k<mbs; k++){ 67*81278733SSatish Balay 68*81278733SSatish Balay /*initialize k-th row with elements nonzero in row perm(k) of A */ 69*81278733SSatish Balay jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 70*81278733SSatish Balay if (jmin < jmax) { 71*81278733SSatish Balay ap = aa + jmin*49; 72*81278733SSatish Balay for (j = jmin; j < jmax; j++){ 73*81278733SSatish Balay vj = perm_ptr[aj[j]]; /* block col. index */ 74*81278733SSatish Balay wp = w + vj*49; 75*81278733SSatish Balay for (i=0; i<49; i++) *wp++ = *ap++; 76*81278733SSatish Balay } 77*81278733SSatish Balay } 78*81278733SSatish Balay 79*81278733SSatish Balay /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 80*81278733SSatish Balay ierr = PetscMemcpy(dk,w+k*49,49*sizeof(MatScalar));CHKERRQ(ierr); 81*81278733SSatish Balay i = jl[k]; /* first row to be added to k_th row */ 82*81278733SSatish Balay 83*81278733SSatish Balay while (i < mbs){ 84*81278733SSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 85*81278733SSatish Balay 86*81278733SSatish Balay /* compute multiplier */ 87*81278733SSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 88*81278733SSatish Balay 89*81278733SSatish Balay /* uik = -inv(Di)*U_bar(i,k) */ 90*81278733SSatish Balay d = ba + i*49; 91*81278733SSatish Balay u = ba + ili*49; 92*81278733SSatish Balay 93*81278733SSatish Balay uik[0] = -(d[0]*u[0] + d[7]*u[1]+ d[14]*u[2]+ d[21]*u[3]+ d[28]*u[4]+ d[35]*u[5]+ d[42]*u[6]); 94*81278733SSatish Balay uik[1] = -(d[1]*u[0] + d[8]*u[1]+ d[15]*u[2]+ d[22]*u[3]+ d[29]*u[4]+ d[36]*u[5]+ d[43]*u[6]); 95*81278733SSatish Balay uik[2] = -(d[2]*u[0] + d[9]*u[1]+ d[16]*u[2]+ d[23]*u[3]+ d[30]*u[4]+ d[37]*u[5]+ d[44]*u[6]); 96*81278733SSatish Balay uik[3] = -(d[3]*u[0]+ d[10]*u[1]+ d[17]*u[2]+ d[24]*u[3]+ d[31]*u[4]+ d[38]*u[5]+ d[45]*u[6]); 97*81278733SSatish Balay uik[4] = -(d[4]*u[0]+ d[11]*u[1]+ d[18]*u[2]+ d[25]*u[3]+ d[32]*u[4]+ d[39]*u[5]+ d[46]*u[6]); 98*81278733SSatish Balay uik[5] = -(d[5]*u[0]+ d[12]*u[1]+ d[19]*u[2]+ d[26]*u[3]+ d[33]*u[4]+ d[40]*u[5]+ d[47]*u[6]); 99*81278733SSatish Balay uik[6] = -(d[6]*u[0]+ d[13]*u[1]+ d[20]*u[2]+ d[27]*u[3]+ d[34]*u[4]+ d[41]*u[5]+ d[48]*u[6]); 100*81278733SSatish Balay 101*81278733SSatish Balay uik[7] = -(d[0]*u[7] + d[7]*u[8]+ d[14]*u[9]+ d[21]*u[10]+ d[28]*u[11]+ d[35]*u[12]+ d[42]*u[13]); 102*81278733SSatish Balay uik[8] = -(d[1]*u[7] + d[8]*u[8]+ d[15]*u[9]+ d[22]*u[10]+ d[29]*u[11]+ d[36]*u[12]+ d[43]*u[13]); 103*81278733SSatish Balay uik[9] = -(d[2]*u[7] + d[9]*u[8]+ d[16]*u[9]+ d[23]*u[10]+ d[30]*u[11]+ d[37]*u[12]+ d[44]*u[13]); 104*81278733SSatish Balay uik[10]= -(d[3]*u[7]+ d[10]*u[8]+ d[17]*u[9]+ d[24]*u[10]+ d[31]*u[11]+ d[38]*u[12]+ d[45]*u[13]); 105*81278733SSatish Balay uik[11]= -(d[4]*u[7]+ d[11]*u[8]+ d[18]*u[9]+ d[25]*u[10]+ d[32]*u[11]+ d[39]*u[12]+ d[46]*u[13]); 106*81278733SSatish Balay uik[12]= -(d[5]*u[7]+ d[12]*u[8]+ d[19]*u[9]+ d[26]*u[10]+ d[33]*u[11]+ d[40]*u[12]+ d[47]*u[13]); 107*81278733SSatish Balay uik[13]= -(d[6]*u[7]+ d[13]*u[8]+ d[20]*u[9]+ d[27]*u[10]+ d[34]*u[11]+ d[41]*u[12]+ d[48]*u[13]); 108*81278733SSatish Balay 109*81278733SSatish Balay uik[14]= -(d[0]*u[14] + d[7]*u[15]+ d[14]*u[16]+ d[21]*u[17]+ d[28]*u[18]+ d[35]*u[19]+ d[42]*u[20]); 110*81278733SSatish Balay uik[15]= -(d[1]*u[14] + d[8]*u[15]+ d[15]*u[16]+ d[22]*u[17]+ d[29]*u[18]+ d[36]*u[19]+ d[43]*u[20]); 111*81278733SSatish Balay uik[16]= -(d[2]*u[14] + d[9]*u[15]+ d[16]*u[16]+ d[23]*u[17]+ d[30]*u[18]+ d[37]*u[19]+ d[44]*u[20]); 112*81278733SSatish Balay uik[17]= -(d[3]*u[14]+ d[10]*u[15]+ d[17]*u[16]+ d[24]*u[17]+ d[31]*u[18]+ d[38]*u[19]+ d[45]*u[20]); 113*81278733SSatish Balay uik[18]= -(d[4]*u[14]+ d[11]*u[15]+ d[18]*u[16]+ d[25]*u[17]+ d[32]*u[18]+ d[39]*u[19]+ d[46]*u[20]); 114*81278733SSatish Balay uik[19]= -(d[5]*u[14]+ d[12]*u[15]+ d[19]*u[16]+ d[26]*u[17]+ d[33]*u[18]+ d[40]*u[19]+ d[47]*u[20]); 115*81278733SSatish Balay uik[20]= -(d[6]*u[14]+ d[13]*u[15]+ d[20]*u[16]+ d[27]*u[17]+ d[34]*u[18]+ d[41]*u[19]+ d[48]*u[20]); 116*81278733SSatish Balay 117*81278733SSatish Balay uik[21]= -(d[0]*u[21] + d[7]*u[22]+ d[14]*u[23]+ d[21]*u[24]+ d[28]*u[25]+ d[35]*u[26]+ d[42]*u[27]); 118*81278733SSatish Balay uik[22]= -(d[1]*u[21] + d[8]*u[22]+ d[15]*u[23]+ d[22]*u[24]+ d[29]*u[25]+ d[36]*u[26]+ d[43]*u[27]); 119*81278733SSatish Balay uik[23]= -(d[2]*u[21] + d[9]*u[22]+ d[16]*u[23]+ d[23]*u[24]+ d[30]*u[25]+ d[37]*u[26]+ d[44]*u[27]); 120*81278733SSatish Balay uik[24]= -(d[3]*u[21]+ d[10]*u[22]+ d[17]*u[23]+ d[24]*u[24]+ d[31]*u[25]+ d[38]*u[26]+ d[45]*u[27]); 121*81278733SSatish Balay uik[25]= -(d[4]*u[21]+ d[11]*u[22]+ d[18]*u[23]+ d[25]*u[24]+ d[32]*u[25]+ d[39]*u[26]+ d[46]*u[27]); 122*81278733SSatish Balay uik[26]= -(d[5]*u[21]+ d[12]*u[22]+ d[19]*u[23]+ d[26]*u[24]+ d[33]*u[25]+ d[40]*u[26]+ d[47]*u[27]); 123*81278733SSatish Balay uik[27]= -(d[6]*u[21]+ d[13]*u[22]+ d[20]*u[23]+ d[27]*u[24]+ d[34]*u[25]+ d[41]*u[26]+ d[48]*u[27]); 124*81278733SSatish Balay 125*81278733SSatish Balay uik[28]= -(d[0]*u[28] + d[7]*u[29]+ d[14]*u[30]+ d[21]*u[31]+ d[28]*u[32]+ d[35]*u[33]+ d[42]*u[34]); 126*81278733SSatish Balay uik[29]= -(d[1]*u[28] + d[8]*u[29]+ d[15]*u[30]+ d[22]*u[31]+ d[29]*u[32]+ d[36]*u[33]+ d[43]*u[34]); 127*81278733SSatish Balay uik[30]= -(d[2]*u[28] + d[9]*u[29]+ d[16]*u[30]+ d[23]*u[31]+ d[30]*u[32]+ d[37]*u[33]+ d[44]*u[34]); 128*81278733SSatish Balay uik[31]= -(d[3]*u[28]+ d[10]*u[29]+ d[17]*u[30]+ d[24]*u[31]+ d[31]*u[32]+ d[38]*u[33]+ d[45]*u[34]); 129*81278733SSatish Balay uik[32]= -(d[4]*u[28]+ d[11]*u[29]+ d[18]*u[30]+ d[25]*u[31]+ d[32]*u[32]+ d[39]*u[33]+ d[46]*u[34]); 130*81278733SSatish Balay uik[33]= -(d[5]*u[28]+ d[12]*u[29]+ d[19]*u[30]+ d[26]*u[31]+ d[33]*u[32]+ d[40]*u[33]+ d[47]*u[34]); 131*81278733SSatish Balay uik[34]= -(d[6]*u[28]+ d[13]*u[29]+ d[20]*u[30]+ d[27]*u[31]+ d[34]*u[32]+ d[41]*u[33]+ d[48]*u[34]); 132*81278733SSatish Balay 133*81278733SSatish Balay uik[35]= -(d[0]*u[35] + d[7]*u[36]+ d[14]*u[37]+ d[21]*u[38]+ d[28]*u[39]+ d[35]*u[40]+ d[42]*u[41]); 134*81278733SSatish Balay uik[36]= -(d[1]*u[35] + d[8]*u[36]+ d[15]*u[37]+ d[22]*u[38]+ d[29]*u[39]+ d[36]*u[40]+ d[43]*u[41]); 135*81278733SSatish Balay uik[37]= -(d[2]*u[35] + d[9]*u[36]+ d[16]*u[37]+ d[23]*u[38]+ d[30]*u[39]+ d[37]*u[40]+ d[44]*u[41]); 136*81278733SSatish Balay uik[38]= -(d[3]*u[35]+ d[10]*u[36]+ d[17]*u[37]+ d[24]*u[38]+ d[31]*u[39]+ d[38]*u[40]+ d[45]*u[41]); 137*81278733SSatish Balay uik[39]= -(d[4]*u[35]+ d[11]*u[36]+ d[18]*u[37]+ d[25]*u[38]+ d[32]*u[39]+ d[39]*u[40]+ d[46]*u[41]); 138*81278733SSatish Balay uik[40]= -(d[5]*u[35]+ d[12]*u[36]+ d[19]*u[37]+ d[26]*u[38]+ d[33]*u[39]+ d[40]*u[40]+ d[47]*u[41]); 139*81278733SSatish Balay uik[41]= -(d[6]*u[35]+ d[13]*u[36]+ d[20]*u[37]+ d[27]*u[38]+ d[34]*u[39]+ d[41]*u[40]+ d[48]*u[41]); 140*81278733SSatish Balay 141*81278733SSatish Balay uik[42]= -(d[0]*u[42] + d[7]*u[43]+ d[14]*u[44]+ d[21]*u[45]+ d[28]*u[46]+ d[35]*u[47]+ d[42]*u[48]); 142*81278733SSatish Balay uik[43]= -(d[1]*u[42] + d[8]*u[43]+ d[15]*u[44]+ d[22]*u[45]+ d[29]*u[46]+ d[36]*u[47]+ d[43]*u[48]); 143*81278733SSatish Balay uik[44]= -(d[2]*u[42] + d[9]*u[43]+ d[16]*u[44]+ d[23]*u[45]+ d[30]*u[46]+ d[37]*u[47]+ d[44]*u[48]); 144*81278733SSatish Balay uik[45]= -(d[3]*u[42]+ d[10]*u[43]+ d[17]*u[44]+ d[24]*u[45]+ d[31]*u[46]+ d[38]*u[47]+ d[45]*u[48]); 145*81278733SSatish Balay uik[46]= -(d[4]*u[42]+ d[11]*u[43]+ d[18]*u[44]+ d[25]*u[45]+ d[32]*u[46]+ d[39]*u[47]+ d[46]*u[48]); 146*81278733SSatish Balay uik[47]= -(d[5]*u[42]+ d[12]*u[43]+ d[19]*u[44]+ d[26]*u[45]+ d[33]*u[46]+ d[40]*u[47]+ d[47]*u[48]); 147*81278733SSatish Balay uik[48]= -(d[6]*u[42]+ d[13]*u[43]+ d[20]*u[44]+ d[27]*u[45]+ d[34]*u[46]+ d[41]*u[47]+ d[48]*u[48]); 148*81278733SSatish Balay 149*81278733SSatish Balay /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 150*81278733SSatish Balay dk[0]+= uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6]; 151*81278733SSatish Balay dk[1]+= uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6]; 152*81278733SSatish Balay dk[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6]; 153*81278733SSatish Balay dk[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6]; 154*81278733SSatish Balay dk[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6]; 155*81278733SSatish Balay dk[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6]; 156*81278733SSatish Balay dk[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6]; 157*81278733SSatish Balay 158*81278733SSatish Balay dk[7]+= uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13]; 159*81278733SSatish Balay dk[8]+= uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]; 160*81278733SSatish Balay dk[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13]; 161*81278733SSatish Balay dk[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13]; 162*81278733SSatish Balay dk[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13]; 163*81278733SSatish Balay dk[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13]; 164*81278733SSatish Balay dk[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13]; 165*81278733SSatish Balay 166*81278733SSatish Balay dk[14]+= uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20]; 167*81278733SSatish Balay dk[15]+= uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20]; 168*81278733SSatish Balay dk[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]; 169*81278733SSatish Balay dk[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20]; 170*81278733SSatish Balay dk[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20]; 171*81278733SSatish Balay dk[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20]; 172*81278733SSatish Balay dk[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20]; 173*81278733SSatish Balay 174*81278733SSatish Balay dk[21]+= uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27]; 175*81278733SSatish Balay dk[22]+= uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27]; 176*81278733SSatish Balay dk[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27]; 177*81278733SSatish Balay dk[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]; 178*81278733SSatish Balay dk[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27]; 179*81278733SSatish Balay dk[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27]; 180*81278733SSatish Balay dk[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27]; 181*81278733SSatish Balay 182*81278733SSatish Balay dk[28]+= uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34]; 183*81278733SSatish Balay dk[29]+= uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34]; 184*81278733SSatish Balay dk[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34]; 185*81278733SSatish Balay dk[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34]; 186*81278733SSatish Balay dk[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]; 187*81278733SSatish Balay dk[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34]; 188*81278733SSatish Balay dk[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34]; 189*81278733SSatish Balay 190*81278733SSatish Balay dk[35]+= uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41]; 191*81278733SSatish Balay dk[36]+= uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41]; 192*81278733SSatish Balay dk[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41]; 193*81278733SSatish Balay dk[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41]; 194*81278733SSatish Balay dk[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41]; 195*81278733SSatish Balay dk[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41]; 196*81278733SSatish Balay dk[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41]; 197*81278733SSatish Balay 198*81278733SSatish Balay dk[42]+= uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48]; 199*81278733SSatish Balay dk[43]+= uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48]; 200*81278733SSatish Balay dk[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48]; 201*81278733SSatish Balay dk[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48]; 202*81278733SSatish Balay dk[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48]; 203*81278733SSatish Balay dk[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48]; 204*81278733SSatish Balay dk[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48]; 205*81278733SSatish Balay 206*81278733SSatish Balay /* update -U(i,k) */ 207*81278733SSatish Balay ierr = PetscMemcpy(ba+ili*49,uik,49*sizeof(MatScalar));CHKERRQ(ierr); 208*81278733SSatish Balay 209*81278733SSatish Balay /* add multiple of row i to k-th row ... */ 210*81278733SSatish Balay jmin = ili + 1; jmax = bi[i+1]; 211*81278733SSatish Balay if (jmin < jmax){ 212*81278733SSatish Balay for (j=jmin; j<jmax; j++) { 213*81278733SSatish Balay /* w += -U(i,k)^T * U_bar(i,j) */ 214*81278733SSatish Balay wp = w + bj[j]*49; 215*81278733SSatish Balay u = ba + j*49; 216*81278733SSatish Balay 217*81278733SSatish Balay wp[0]+= uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3] + uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6]; 218*81278733SSatish Balay wp[1]+= uik[7]*u[0] + uik[8]*u[1] + uik[9]*u[2]+ uik[10]*u[3]+ uik[11]*u[4]+ uik[12]*u[5]+ uik[13]*u[6]; 219*81278733SSatish Balay wp[2]+= uik[14]*u[0]+ uik[15]*u[1]+ uik[16]*u[2]+ uik[17]*u[3]+ uik[18]*u[4]+ uik[19]*u[5]+ uik[20]*u[6]; 220*81278733SSatish Balay wp[3]+= uik[21]*u[0]+ uik[22]*u[1]+ uik[23]*u[2]+ uik[24]*u[3]+ uik[25]*u[4]+ uik[26]*u[5]+ uik[27]*u[6]; 221*81278733SSatish Balay wp[4]+= uik[28]*u[0]+ uik[29]*u[1]+ uik[30]*u[2]+ uik[31]*u[3]+ uik[32]*u[4]+ uik[33]*u[5]+ uik[34]*u[6]; 222*81278733SSatish Balay wp[5]+= uik[35]*u[0]+ uik[36]*u[1]+ uik[37]*u[2]+ uik[38]*u[3]+ uik[39]*u[4]+ uik[40]*u[5]+ uik[41]*u[6]; 223*81278733SSatish Balay wp[6]+= uik[42]*u[0]+ uik[43]*u[1]+ uik[44]*u[2]+ uik[45]*u[3]+ uik[46]*u[4]+ uik[47]*u[5]+ uik[48]*u[6]; 224*81278733SSatish Balay 225*81278733SSatish Balay wp[7]+= uik[0]*u[7] + uik[1]*u[8] + uik[2]*u[9] + uik[3]*u[10] + uik[4]*u[11] + uik[5]*u[12] + uik[6]*u[13]; 226*81278733SSatish Balay wp[8]+= uik[7]*u[7] + uik[8]*u[8] + uik[9]*u[9]+ uik[10]*u[10]+ uik[11]*u[11]+ uik[12]*u[12]+ uik[13]*u[13]; 227*81278733SSatish Balay wp[9]+= uik[14]*u[7]+ uik[15]*u[8]+ uik[16]*u[9]+ uik[17]*u[10]+ uik[18]*u[11]+ uik[19]*u[12]+ uik[20]*u[13]; 228*81278733SSatish Balay wp[10]+=uik[21]*u[7]+ uik[22]*u[8]+ uik[23]*u[9]+ uik[24]*u[10]+ uik[25]*u[11]+ uik[26]*u[12]+ uik[27]*u[13]; 229*81278733SSatish Balay wp[11]+=uik[28]*u[7]+ uik[29]*u[8]+ uik[30]*u[9]+ uik[31]*u[10]+ uik[32]*u[11]+ uik[33]*u[12]+ uik[34]*u[13]; 230*81278733SSatish Balay wp[12]+=uik[35]*u[7]+ uik[36]*u[8]+ uik[37]*u[9]+ uik[38]*u[10]+ uik[39]*u[11]+ uik[40]*u[12]+ uik[41]*u[13]; 231*81278733SSatish Balay wp[13]+=uik[42]*u[7]+ uik[43]*u[8]+ uik[44]*u[9]+ uik[45]*u[10]+ uik[46]*u[11]+ uik[47]*u[12]+ uik[48]*u[13]; 232*81278733SSatish Balay 233*81278733SSatish Balay wp[14]+= uik[0]*u[14] + uik[1]*u[15] + uik[2]*u[16] + uik[3]*u[17] + uik[4]*u[18] + uik[5]*u[19] + uik[6]*u[20]; 234*81278733SSatish Balay wp[15]+= uik[7]*u[14] + uik[8]*u[15] + uik[9]*u[16]+ uik[10]*u[17]+ uik[11]*u[18]+ uik[12]*u[19]+ uik[13]*u[20]; 235*81278733SSatish Balay wp[16]+= uik[14]*u[14]+ uik[15]*u[15]+ uik[16]*u[16]+ uik[17]*u[17]+ uik[18]*u[18]+ uik[19]*u[19]+ uik[20]*u[20]; 236*81278733SSatish Balay wp[17]+= uik[21]*u[14]+ uik[22]*u[15]+ uik[23]*u[16]+ uik[24]*u[17]+ uik[25]*u[18]+ uik[26]*u[19]+ uik[27]*u[20]; 237*81278733SSatish Balay wp[18]+= uik[28]*u[14]+ uik[29]*u[15]+ uik[30]*u[16]+ uik[31]*u[17]+ uik[32]*u[18]+ uik[33]*u[19]+ uik[34]*u[20]; 238*81278733SSatish Balay wp[19]+= uik[35]*u[14]+ uik[36]*u[15]+ uik[37]*u[16]+ uik[38]*u[17]+ uik[39]*u[18]+ uik[40]*u[19]+ uik[41]*u[20]; 239*81278733SSatish Balay wp[20]+= uik[42]*u[14]+ uik[43]*u[15]+ uik[44]*u[16]+ uik[45]*u[17]+ uik[46]*u[18]+ uik[47]*u[19]+ uik[48]*u[20]; 240*81278733SSatish Balay 241*81278733SSatish Balay wp[21]+= uik[0]*u[21] + uik[1]*u[22] + uik[2]*u[23] + uik[3]*u[24] + uik[4]*u[25] + uik[5]*u[26] + uik[6]*u[27]; 242*81278733SSatish Balay wp[22]+= uik[7]*u[21] + uik[8]*u[22] + uik[9]*u[23]+ uik[10]*u[24]+ uik[11]*u[25]+ uik[12]*u[26]+ uik[13]*u[27]; 243*81278733SSatish Balay wp[23]+= uik[14]*u[21]+ uik[15]*u[22]+ uik[16]*u[23]+ uik[17]*u[24]+ uik[18]*u[25]+ uik[19]*u[26]+ uik[20]*u[27]; 244*81278733SSatish Balay wp[24]+= uik[21]*u[21]+ uik[22]*u[22]+ uik[23]*u[23]+ uik[24]*u[24]+ uik[25]*u[25]+ uik[26]*u[26]+ uik[27]*u[27]; 245*81278733SSatish Balay wp[25]+= uik[28]*u[21]+ uik[29]*u[22]+ uik[30]*u[23]+ uik[31]*u[24]+ uik[32]*u[25]+ uik[33]*u[26]+ uik[34]*u[27]; 246*81278733SSatish Balay wp[26]+= uik[35]*u[21]+ uik[36]*u[22]+ uik[37]*u[23]+ uik[38]*u[24]+ uik[39]*u[25]+ uik[40]*u[26]+ uik[41]*u[27]; 247*81278733SSatish Balay wp[27]+= uik[42]*u[21]+ uik[43]*u[22]+ uik[44]*u[23]+ uik[45]*u[24]+ uik[46]*u[25]+ uik[47]*u[26]+ uik[48]*u[27]; 248*81278733SSatish Balay 249*81278733SSatish Balay wp[28]+= uik[0]*u[28] + uik[1]*u[29] + uik[2]*u[30] + uik[3]*u[31] + uik[4]*u[32] + uik[5]*u[33] + uik[6]*u[34]; 250*81278733SSatish Balay wp[29]+= uik[7]*u[28] + uik[8]*u[29] + uik[9]*u[30]+ uik[10]*u[31]+ uik[11]*u[32]+ uik[12]*u[33]+ uik[13]*u[34]; 251*81278733SSatish Balay wp[30]+= uik[14]*u[28]+ uik[15]*u[29]+ uik[16]*u[30]+ uik[17]*u[31]+ uik[18]*u[32]+ uik[19]*u[33]+ uik[20]*u[34]; 252*81278733SSatish Balay wp[31]+= uik[21]*u[28]+ uik[22]*u[29]+ uik[23]*u[30]+ uik[24]*u[31]+ uik[25]*u[32]+ uik[26]*u[33]+ uik[27]*u[34]; 253*81278733SSatish Balay wp[32]+= uik[28]*u[28]+ uik[29]*u[29]+ uik[30]*u[30]+ uik[31]*u[31]+ uik[32]*u[32]+ uik[33]*u[33]+ uik[34]*u[34]; 254*81278733SSatish Balay wp[33]+= uik[35]*u[28]+ uik[36]*u[29]+ uik[37]*u[30]+ uik[38]*u[31]+ uik[39]*u[32]+ uik[40]*u[33]+ uik[41]*u[34]; 255*81278733SSatish Balay wp[34]+= uik[42]*u[28]+ uik[43]*u[29]+ uik[44]*u[30]+ uik[45]*u[31]+ uik[46]*u[32]+ uik[47]*u[33]+ uik[48]*u[34]; 256*81278733SSatish Balay 257*81278733SSatish Balay wp[35]+= uik[0]*u[35] + uik[1]*u[36] + uik[2]*u[37] + uik[3]*u[38] + uik[4]*u[39] + uik[5]*u[40] + uik[6]*u[41]; 258*81278733SSatish Balay wp[36]+= uik[7]*u[35] + uik[8]*u[36] + uik[9]*u[37]+ uik[10]*u[38]+ uik[11]*u[39]+ uik[12]*u[40]+ uik[13]*u[41]; 259*81278733SSatish Balay wp[37]+= uik[14]*u[35]+ uik[15]*u[36]+ uik[16]*u[37]+ uik[17]*u[38]+ uik[18]*u[39]+ uik[19]*u[40]+ uik[20]*u[41]; 260*81278733SSatish Balay wp[38]+= uik[21]*u[35]+ uik[22]*u[36]+ uik[23]*u[37]+ uik[24]*u[38]+ uik[25]*u[39]+ uik[26]*u[40]+ uik[27]*u[41]; 261*81278733SSatish Balay wp[39]+= uik[28]*u[35]+ uik[29]*u[36]+ uik[30]*u[37]+ uik[31]*u[38]+ uik[32]*u[39]+ uik[33]*u[40]+ uik[34]*u[41]; 262*81278733SSatish Balay wp[40]+= uik[35]*u[35]+ uik[36]*u[36]+ uik[37]*u[37]+ uik[38]*u[38]+ uik[39]*u[39]+ uik[40]*u[40]+ uik[41]*u[41]; 263*81278733SSatish Balay wp[41]+= uik[42]*u[35]+ uik[43]*u[36]+ uik[44]*u[37]+ uik[45]*u[38]+ uik[46]*u[39]+ uik[47]*u[40]+ uik[48]*u[41]; 264*81278733SSatish Balay 265*81278733SSatish Balay wp[42]+= uik[0]*u[42] + uik[1]*u[43] + uik[2]*u[44] + uik[3]*u[45] + uik[4]*u[46] + uik[5]*u[47] + uik[6]*u[48]; 266*81278733SSatish Balay wp[43]+= uik[7]*u[42] + uik[8]*u[43] + uik[9]*u[44]+ uik[10]*u[45]+ uik[11]*u[46]+ uik[12]*u[47]+ uik[13]*u[48]; 267*81278733SSatish Balay wp[44]+= uik[14]*u[42]+ uik[15]*u[43]+ uik[16]*u[44]+ uik[17]*u[45]+ uik[18]*u[46]+ uik[19]*u[47]+ uik[20]*u[48]; 268*81278733SSatish Balay wp[45]+= uik[21]*u[42]+ uik[22]*u[43]+ uik[23]*u[44]+ uik[24]*u[45]+ uik[25]*u[46]+ uik[26]*u[47]+ uik[27]*u[48]; 269*81278733SSatish Balay wp[46]+= uik[28]*u[42]+ uik[29]*u[43]+ uik[30]*u[44]+ uik[31]*u[45]+ uik[32]*u[46]+ uik[33]*u[47]+ uik[34]*u[48]; 270*81278733SSatish Balay wp[47]+= uik[35]*u[42]+ uik[36]*u[43]+ uik[37]*u[44]+ uik[38]*u[45]+ uik[39]*u[46]+ uik[40]*u[47]+ uik[41]*u[48]; 271*81278733SSatish Balay wp[48]+= uik[42]*u[42]+ uik[43]*u[43]+ uik[44]*u[44]+ uik[45]*u[45]+ uik[46]*u[46]+ uik[47]*u[47]+ uik[48]*u[48]; 272*81278733SSatish Balay } 273*81278733SSatish Balay 274*81278733SSatish Balay /* ... add i to row list for next nonzero entry */ 275*81278733SSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 276*81278733SSatish Balay j = bj[jmin]; 277*81278733SSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 278*81278733SSatish Balay } 279*81278733SSatish Balay i = nexti; 280*81278733SSatish Balay } 281*81278733SSatish Balay 282*81278733SSatish Balay /* save nonzero entries in k-th row of U ... */ 283*81278733SSatish Balay 284*81278733SSatish Balay /* invert diagonal block */ 285*81278733SSatish Balay d = ba+k*49; 286*81278733SSatish Balay ierr = PetscMemcpy(d,dk,49*sizeof(MatScalar));CHKERRQ(ierr); 287*81278733SSatish Balay ierr = Kernel_A_gets_inverse_A_7(d);CHKERRQ(ierr); 288*81278733SSatish Balay 289*81278733SSatish Balay jmin = bi[k]; jmax = bi[k+1]; 290*81278733SSatish Balay if (jmin < jmax) { 291*81278733SSatish Balay for (j=jmin; j<jmax; j++){ 292*81278733SSatish Balay vj = bj[j]; /* block col. index of U */ 293*81278733SSatish Balay u = ba + j*49; 294*81278733SSatish Balay wp = w + vj*49; 295*81278733SSatish Balay for (k1=0; k1<49; k1++){ 296*81278733SSatish Balay *u++ = *wp; 297*81278733SSatish Balay *wp++ = 0.0; 298*81278733SSatish Balay } 299*81278733SSatish Balay } 300*81278733SSatish Balay 301*81278733SSatish Balay /* ... add k to row list for first nonzero entry in k-th row */ 302*81278733SSatish Balay il[k] = jmin; 303*81278733SSatish Balay i = bj[jmin]; 304*81278733SSatish Balay jl[k] = jl[i]; jl[i] = k; 305*81278733SSatish Balay } 306*81278733SSatish Balay } 307*81278733SSatish Balay 308*81278733SSatish Balay ierr = PetscFree(w);CHKERRQ(ierr); 309*81278733SSatish Balay ierr = PetscFree(il);CHKERRQ(ierr); 310*81278733SSatish Balay ierr = PetscFree(dk);CHKERRQ(ierr); 311*81278733SSatish Balay if (a->permute){ 312*81278733SSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 313*81278733SSatish Balay } 314*81278733SSatish Balay 315*81278733SSatish Balay ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 316*81278733SSatish Balay C->factor = FACTOR_CHOLESKY; 317*81278733SSatish Balay C->assembled = PETSC_TRUE; 318*81278733SSatish Balay C->preallocated = PETSC_TRUE; 319*81278733SSatish Balay PetscLogFlops(1.3333*343*b->mbs); /* from inverting diagonal blocks */ 320*81278733SSatish Balay PetscFunctionReturn(0); 321*81278733SSatish Balay } 322