1be1d678aSKris Buschelman #define PETSCMAT_DLL 21c2a3de1SBarry Smith 37c4f633dSBarry Smith #include "../src/mat/impls/sbaij/seq/sbaij.h" 4c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 581278733SSatish Balay 681278733SSatish Balay /* 781278733SSatish Balay Version for when blocks are 7 by 7 Using natural ordering 881278733SSatish Balay */ 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering" 110481f469SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 1281278733SSatish Balay { 1381278733SSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 14dfbe8321SBarry Smith PetscErrorCode ierr; 1513f74950SBarry Smith PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 1613f74950SBarry Smith PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 1781278733SSatish Balay MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 181b3064deSBarry Smith MatScalar *u,*d,*w,*wp,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12; 191b3064deSBarry Smith MatScalar u13,u14,u15,u16,u17,u18,u19,u20,u21,u22,u23,u24,u25,u26,u27; 201b3064deSBarry Smith MatScalar u28,u29,u30,u31,u32,u33,u34,u35,u36,u37,u38,u39,u40,u41; 211b3064deSBarry Smith MatScalar u42,u43,u44,u45,u46,u47,u48; 2262bba022SBarry Smith PetscReal shift = info->shiftinblocks; 2381278733SSatish Balay 2481278733SSatish Balay PetscFunctionBegin; 2581278733SSatish Balay /* initialization */ 2681278733SSatish Balay ierr = PetscMalloc(49*mbs*sizeof(MatScalar),&w);CHKERRQ(ierr); 2781278733SSatish Balay ierr = PetscMemzero(w,49*mbs*sizeof(MatScalar));CHKERRQ(ierr); 28d8c74875SBarry Smith ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2981278733SSatish Balay for (i=0; i<mbs; i++) { 3081278733SSatish Balay jl[i] = mbs; il[0] = 0; 3181278733SSatish Balay } 32d8c74875SBarry Smith ierr = PetscMalloc2(49,MatScalar,&dk,49,MatScalar,&uik);CHKERRQ(ierr); 3381278733SSatish Balay ai = a->i; aj = a->j; aa = a->a; 3481278733SSatish Balay 3581278733SSatish Balay /* for each row k */ 3681278733SSatish Balay for (k = 0; k<mbs; k++){ 3781278733SSatish Balay 3881278733SSatish Balay /*initialize k-th row with elements nonzero in row k of A */ 3981278733SSatish Balay jmin = ai[k]; jmax = ai[k+1]; 4081278733SSatish Balay if (jmin < jmax) { 4181278733SSatish Balay ap = aa + jmin*49; 4281278733SSatish Balay for (j = jmin; j < jmax; j++){ 4381278733SSatish Balay vj = aj[j]; /* block col. index */ 4481278733SSatish Balay wp = w + vj*49; 4581278733SSatish Balay for (i=0; i<49; i++) *wp++ = *ap++; 4681278733SSatish Balay } 4781278733SSatish Balay } 4881278733SSatish Balay 4981278733SSatish Balay /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 5081278733SSatish Balay ierr = PetscMemcpy(dk,w+k*49,49*sizeof(MatScalar));CHKERRQ(ierr); 5181278733SSatish Balay i = jl[k]; /* first row to be added to k_th row */ 5281278733SSatish Balay 5381278733SSatish Balay while (i < mbs){ 5481278733SSatish Balay nexti = jl[i]; /* next row to be added to k_th row */ 5581278733SSatish Balay 5681278733SSatish Balay /* compute multiplier */ 5781278733SSatish Balay ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 5881278733SSatish Balay 5981278733SSatish Balay /* uik = -inv(Di)*U_bar(i,k) */ 6081278733SSatish Balay d = ba + i*49; 6181278733SSatish Balay u = ba + ili*49; 6281278733SSatish Balay 631b3064deSBarry Smith u0 = u[0]; u1 = u[1]; u2 = u[2]; u3 = u[3]; u4 = u[4]; u5 = u[5]; u6 = u[6]; 641b3064deSBarry Smith u7 = u[7]; u8 = u[8]; u9 = u[9]; u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13]; 651b3064deSBarry Smith u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20]; 661b3064deSBarry Smith u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27]; 671b3064deSBarry Smith u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34]; 681b3064deSBarry Smith u35 = u[35]; u36 = u[36]; u37 = u[37]; u38 = u[38]; u39 = u[39]; u40 = u[40]; u41 = u[41]; u42 = u[42]; 691b3064deSBarry Smith u43 = u[43]; u44 = u[44]; u45 = u[45]; u46 = u[46]; u47 = u[47]; u48 = u[48]; 7081278733SSatish Balay 711b3064deSBarry Smith uik[0] = -(d[0]*u0 + d[7]*u1+ d[14]*u2+ d[21]*u3+ d[28]*u4+ d[35]*u5+ d[42]*u6); 721b3064deSBarry Smith uik[1] = -(d[1]*u0 + d[8]*u1+ d[15]*u2+ d[22]*u3+ d[29]*u4+ d[36]*u5+ d[43]*u6); 731b3064deSBarry Smith uik[2] = -(d[2]*u0 + d[9]*u1+ d[16]*u2+ d[23]*u3+ d[30]*u4+ d[37]*u5+ d[44]*u6); 741b3064deSBarry Smith uik[3] = -(d[3]*u0+ d[10]*u1+ d[17]*u2+ d[24]*u3+ d[31]*u4+ d[38]*u5+ d[45]*u6); 751b3064deSBarry Smith uik[4] = -(d[4]*u0+ d[11]*u1+ d[18]*u2+ d[25]*u3+ d[32]*u4+ d[39]*u5+ d[46]*u6); 761b3064deSBarry Smith uik[5] = -(d[5]*u0+ d[12]*u1+ d[19]*u2+ d[26]*u3+ d[33]*u4+ d[40]*u5+ d[47]*u6); 771b3064deSBarry Smith uik[6] = -(d[6]*u0+ d[13]*u1+ d[20]*u2+ d[27]*u3+ d[34]*u4+ d[41]*u5+ d[48]*u6); 7881278733SSatish Balay 791b3064deSBarry Smith uik[7] = -(d[0]*u7 + d[7]*u8+ d[14]*u9+ d[21]*u10+ d[28]*u11+ d[35]*u12+ d[42]*u13); 801b3064deSBarry Smith uik[8] = -(d[1]*u7 + d[8]*u8+ d[15]*u9+ d[22]*u10+ d[29]*u11+ d[36]*u12+ d[43]*u13); 811b3064deSBarry Smith uik[9] = -(d[2]*u7 + d[9]*u8+ d[16]*u9+ d[23]*u10+ d[30]*u11+ d[37]*u12+ d[44]*u13); 821b3064deSBarry Smith uik[10]= -(d[3]*u7+ d[10]*u8+ d[17]*u9+ d[24]*u10+ d[31]*u11+ d[38]*u12+ d[45]*u13); 831b3064deSBarry Smith uik[11]= -(d[4]*u7+ d[11]*u8+ d[18]*u9+ d[25]*u10+ d[32]*u11+ d[39]*u12+ d[46]*u13); 841b3064deSBarry Smith uik[12]= -(d[5]*u7+ d[12]*u8+ d[19]*u9+ d[26]*u10+ d[33]*u11+ d[40]*u12+ d[47]*u13); 851b3064deSBarry Smith uik[13]= -(d[6]*u7+ d[13]*u8+ d[20]*u9+ d[27]*u10+ d[34]*u11+ d[41]*u12+ d[48]*u13); 8681278733SSatish Balay 871b3064deSBarry Smith uik[14]= -(d[0]*u14 + d[7]*u15+ d[14]*u16+ d[21]*u17+ d[28]*u18+ d[35]*u19+ d[42]*u20); 881b3064deSBarry Smith uik[15]= -(d[1]*u14 + d[8]*u15+ d[15]*u16+ d[22]*u17+ d[29]*u18+ d[36]*u19+ d[43]*u20); 891b3064deSBarry Smith uik[16]= -(d[2]*u14 + d[9]*u15+ d[16]*u16+ d[23]*u17+ d[30]*u18+ d[37]*u19+ d[44]*u20); 901b3064deSBarry Smith uik[17]= -(d[3]*u14+ d[10]*u15+ d[17]*u16+ d[24]*u17+ d[31]*u18+ d[38]*u19+ d[45]*u20); 911b3064deSBarry Smith uik[18]= -(d[4]*u14+ d[11]*u15+ d[18]*u16+ d[25]*u17+ d[32]*u18+ d[39]*u19+ d[46]*u20); 921b3064deSBarry Smith uik[19]= -(d[5]*u14+ d[12]*u15+ d[19]*u16+ d[26]*u17+ d[33]*u18+ d[40]*u19+ d[47]*u20); 931b3064deSBarry Smith uik[20]= -(d[6]*u14+ d[13]*u15+ d[20]*u16+ d[27]*u17+ d[34]*u18+ d[41]*u19+ d[48]*u20); 9481278733SSatish Balay 951b3064deSBarry Smith uik[21]= -(d[0]*u21 + d[7]*u22+ d[14]*u23+ d[21]*u24+ d[28]*u25+ d[35]*u26+ d[42]*u27); 961b3064deSBarry Smith uik[22]= -(d[1]*u21 + d[8]*u22+ d[15]*u23+ d[22]*u24+ d[29]*u25+ d[36]*u26+ d[43]*u27); 971b3064deSBarry Smith uik[23]= -(d[2]*u21 + d[9]*u22+ d[16]*u23+ d[23]*u24+ d[30]*u25+ d[37]*u26+ d[44]*u27); 981b3064deSBarry Smith uik[24]= -(d[3]*u21+ d[10]*u22+ d[17]*u23+ d[24]*u24+ d[31]*u25+ d[38]*u26+ d[45]*u27); 991b3064deSBarry Smith uik[25]= -(d[4]*u21+ d[11]*u22+ d[18]*u23+ d[25]*u24+ d[32]*u25+ d[39]*u26+ d[46]*u27); 1001b3064deSBarry Smith uik[26]= -(d[5]*u21+ d[12]*u22+ d[19]*u23+ d[26]*u24+ d[33]*u25+ d[40]*u26+ d[47]*u27); 1011b3064deSBarry Smith uik[27]= -(d[6]*u21+ d[13]*u22+ d[20]*u23+ d[27]*u24+ d[34]*u25+ d[41]*u26+ d[48]*u27); 10281278733SSatish Balay 1031b3064deSBarry Smith uik[28]= -(d[0]*u28 + d[7]*u29+ d[14]*u30+ d[21]*u31+ d[28]*u32+ d[35]*u33+ d[42]*u34); 1041b3064deSBarry Smith uik[29]= -(d[1]*u28 + d[8]*u29+ d[15]*u30+ d[22]*u31+ d[29]*u32+ d[36]*u33+ d[43]*u34); 1051b3064deSBarry Smith uik[30]= -(d[2]*u28 + d[9]*u29+ d[16]*u30+ d[23]*u31+ d[30]*u32+ d[37]*u33+ d[44]*u34); 1061b3064deSBarry Smith uik[31]= -(d[3]*u28+ d[10]*u29+ d[17]*u30+ d[24]*u31+ d[31]*u32+ d[38]*u33+ d[45]*u34); 1071b3064deSBarry Smith uik[32]= -(d[4]*u28+ d[11]*u29+ d[18]*u30+ d[25]*u31+ d[32]*u32+ d[39]*u33+ d[46]*u34); 1081b3064deSBarry Smith uik[33]= -(d[5]*u28+ d[12]*u29+ d[19]*u30+ d[26]*u31+ d[33]*u32+ d[40]*u33+ d[47]*u34); 1091b3064deSBarry Smith uik[34]= -(d[6]*u28+ d[13]*u29+ d[20]*u30+ d[27]*u31+ d[34]*u32+ d[41]*u33+ d[48]*u34); 11081278733SSatish Balay 1111b3064deSBarry Smith uik[35]= -(d[0]*u35 + d[7]*u36+ d[14]*u37+ d[21]*u38+ d[28]*u39+ d[35]*u40+ d[42]*u41); 1121b3064deSBarry Smith uik[36]= -(d[1]*u35 + d[8]*u36+ d[15]*u37+ d[22]*u38+ d[29]*u39+ d[36]*u40+ d[43]*u41); 1131b3064deSBarry Smith uik[37]= -(d[2]*u35 + d[9]*u36+ d[16]*u37+ d[23]*u38+ d[30]*u39+ d[37]*u40+ d[44]*u41); 1141b3064deSBarry Smith uik[38]= -(d[3]*u35+ d[10]*u36+ d[17]*u37+ d[24]*u38+ d[31]*u39+ d[38]*u40+ d[45]*u41); 1151b3064deSBarry Smith uik[39]= -(d[4]*u35+ d[11]*u36+ d[18]*u37+ d[25]*u38+ d[32]*u39+ d[39]*u40+ d[46]*u41); 1161b3064deSBarry Smith uik[40]= -(d[5]*u35+ d[12]*u36+ d[19]*u37+ d[26]*u38+ d[33]*u39+ d[40]*u40+ d[47]*u41); 1171b3064deSBarry Smith uik[41]= -(d[6]*u35+ d[13]*u36+ d[20]*u37+ d[27]*u38+ d[34]*u39+ d[41]*u40+ d[48]*u41); 1181b3064deSBarry Smith 1191b3064deSBarry Smith uik[42]= -(d[0]*u42 + d[7]*u43+ d[14]*u44+ d[21]*u45+ d[28]*u46+ d[35]*u47+ d[42]*u48); 1201b3064deSBarry Smith uik[43]= -(d[1]*u42 + d[8]*u43+ d[15]*u44+ d[22]*u45+ d[29]*u46+ d[36]*u47+ d[43]*u48); 1211b3064deSBarry Smith uik[44]= -(d[2]*u42 + d[9]*u43+ d[16]*u44+ d[23]*u45+ d[30]*u46+ d[37]*u47+ d[44]*u48); 1221b3064deSBarry Smith uik[45]= -(d[3]*u42+ d[10]*u43+ d[17]*u44+ d[24]*u45+ d[31]*u46+ d[38]*u47+ d[45]*u48); 1231b3064deSBarry Smith uik[46]= -(d[4]*u42+ d[11]*u43+ d[18]*u44+ d[25]*u45+ d[32]*u46+ d[39]*u47+ d[46]*u48); 1241b3064deSBarry Smith uik[47]= -(d[5]*u42+ d[12]*u43+ d[19]*u44+ d[26]*u45+ d[33]*u46+ d[40]*u47+ d[47]*u48); 1251b3064deSBarry Smith uik[48]= -(d[6]*u42+ d[13]*u43+ d[20]*u44+ d[27]*u45+ d[34]*u46+ d[41]*u47+ d[48]*u48); 12681278733SSatish Balay 12781278733SSatish Balay /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 1281b3064deSBarry Smith dk[0]+= uik[0]*u0 + uik[1]*u1 + uik[2]*u2 + uik[3]*u3 + uik[4]*u4 + uik[5]*u5 + uik[6]*u6; 1291b3064deSBarry Smith dk[1]+= uik[7]*u0 + uik[8]*u1 + uik[9]*u2+ uik[10]*u3+ uik[11]*u4+ uik[12]*u5+ uik[13]*u6; 1301b3064deSBarry Smith dk[2]+= uik[14]*u0+ uik[15]*u1+ uik[16]*u2+ uik[17]*u3+ uik[18]*u4+ uik[19]*u5+ uik[20]*u6; 1311b3064deSBarry Smith dk[3]+= uik[21]*u0+ uik[22]*u1+ uik[23]*u2+ uik[24]*u3+ uik[25]*u4+ uik[26]*u5+ uik[27]*u6; 1321b3064deSBarry Smith dk[4]+= uik[28]*u0+ uik[29]*u1+ uik[30]*u2+ uik[31]*u3+ uik[32]*u4+ uik[33]*u5+ uik[34]*u6; 1331b3064deSBarry Smith dk[5]+= uik[35]*u0+ uik[36]*u1+ uik[37]*u2+ uik[38]*u3+ uik[39]*u4+ uik[40]*u5+ uik[41]*u6; 1341b3064deSBarry Smith dk[6]+= uik[42]*u0+ uik[43]*u1+ uik[44]*u2+ uik[45]*u3+ uik[46]*u4+ uik[47]*u5+ uik[48]*u6; 13581278733SSatish Balay 1361b3064deSBarry Smith dk[7]+= uik[0]*u7 + uik[1]*u8 + uik[2]*u9 + uik[3]*u10 + uik[4]*u11 + uik[5]*u12 + uik[6]*u13; 1371b3064deSBarry Smith dk[8]+= uik[7]*u7 + uik[8]*u8 + uik[9]*u9+ uik[10]*u10+ uik[11]*u11+ uik[12]*u12+ uik[13]*u13; 1381b3064deSBarry Smith dk[9]+= uik[14]*u7+ uik[15]*u8+ uik[16]*u9+ uik[17]*u10+ uik[18]*u11+ uik[19]*u12+ uik[20]*u13; 1391b3064deSBarry Smith dk[10]+=uik[21]*u7+ uik[22]*u8+ uik[23]*u9+ uik[24]*u10+ uik[25]*u11+ uik[26]*u12+ uik[27]*u13; 1401b3064deSBarry Smith dk[11]+=uik[28]*u7+ uik[29]*u8+ uik[30]*u9+ uik[31]*u10+ uik[32]*u11+ uik[33]*u12+ uik[34]*u13; 1411b3064deSBarry Smith dk[12]+=uik[35]*u7+ uik[36]*u8+ uik[37]*u9+ uik[38]*u10+ uik[39]*u11+ uik[40]*u12+ uik[41]*u13; 1421b3064deSBarry Smith dk[13]+=uik[42]*u7+ uik[43]*u8+ uik[44]*u9+ uik[45]*u10+ uik[46]*u11+ uik[47]*u12+ uik[48]*u13; 14381278733SSatish Balay 1441b3064deSBarry Smith dk[14]+= uik[0]*u14 + uik[1]*u15 + uik[2]*u16 + uik[3]*u17 + uik[4]*u18 + uik[5]*u19 + uik[6]*u20; 1451b3064deSBarry Smith dk[15]+= uik[7]*u14 + uik[8]*u15 + uik[9]*u16+ uik[10]*u17+ uik[11]*u18+ uik[12]*u19+ uik[13]*u20; 1461b3064deSBarry Smith dk[16]+= uik[14]*u14+ uik[15]*u15+ uik[16]*u16+ uik[17]*u17+ uik[18]*u18+ uik[19]*u19+ uik[20]*u20; 1471b3064deSBarry Smith dk[17]+= uik[21]*u14+ uik[22]*u15+ uik[23]*u16+ uik[24]*u17+ uik[25]*u18+ uik[26]*u19+ uik[27]*u20; 1481b3064deSBarry Smith dk[18]+= uik[28]*u14+ uik[29]*u15+ uik[30]*u16+ uik[31]*u17+ uik[32]*u18+ uik[33]*u19+ uik[34]*u20; 1491b3064deSBarry Smith dk[19]+= uik[35]*u14+ uik[36]*u15+ uik[37]*u16+ uik[38]*u17+ uik[39]*u18+ uik[40]*u19+ uik[41]*u20; 1501b3064deSBarry Smith dk[20]+= uik[42]*u14+ uik[43]*u15+ uik[44]*u16+ uik[45]*u17+ uik[46]*u18+ uik[47]*u19+ uik[48]*u20; 15181278733SSatish Balay 1521b3064deSBarry Smith dk[21]+= uik[0]*u21 + uik[1]*u22 + uik[2]*u23 + uik[3]*u24 + uik[4]*u25 + uik[5]*u26 + uik[6]*u27; 1531b3064deSBarry Smith dk[22]+= uik[7]*u21 + uik[8]*u22 + uik[9]*u23+ uik[10]*u24+ uik[11]*u25+ uik[12]*u26+ uik[13]*u27; 1541b3064deSBarry Smith dk[23]+= uik[14]*u21+ uik[15]*u22+ uik[16]*u23+ uik[17]*u24+ uik[18]*u25+ uik[19]*u26+ uik[20]*u27; 1551b3064deSBarry Smith dk[24]+= uik[21]*u21+ uik[22]*u22+ uik[23]*u23+ uik[24]*u24+ uik[25]*u25+ uik[26]*u26+ uik[27]*u27; 1561b3064deSBarry Smith dk[25]+= uik[28]*u21+ uik[29]*u22+ uik[30]*u23+ uik[31]*u24+ uik[32]*u25+ uik[33]*u26+ uik[34]*u27; 1571b3064deSBarry Smith dk[26]+= uik[35]*u21+ uik[36]*u22+ uik[37]*u23+ uik[38]*u24+ uik[39]*u25+ uik[40]*u26+ uik[41]*u27; 1581b3064deSBarry Smith dk[27]+= uik[42]*u21+ uik[43]*u22+ uik[44]*u23+ uik[45]*u24+ uik[46]*u25+ uik[47]*u26+ uik[48]*u27; 15981278733SSatish Balay 1601b3064deSBarry Smith dk[28]+= uik[0]*u28 + uik[1]*u29 + uik[2]*u30 + uik[3]*u31 + uik[4]*u32 + uik[5]*u33 + uik[6]*u34; 1611b3064deSBarry Smith dk[29]+= uik[7]*u28 + uik[8]*u29 + uik[9]*u30+ uik[10]*u31+ uik[11]*u32+ uik[12]*u33+ uik[13]*u34; 1621b3064deSBarry Smith dk[30]+= uik[14]*u28+ uik[15]*u29+ uik[16]*u30+ uik[17]*u31+ uik[18]*u32+ uik[19]*u33+ uik[20]*u34; 1631b3064deSBarry Smith dk[31]+= uik[21]*u28+ uik[22]*u29+ uik[23]*u30+ uik[24]*u31+ uik[25]*u32+ uik[26]*u33+ uik[27]*u34; 1641b3064deSBarry Smith dk[32]+= uik[28]*u28+ uik[29]*u29+ uik[30]*u30+ uik[31]*u31+ uik[32]*u32+ uik[33]*u33+ uik[34]*u34; 1651b3064deSBarry Smith dk[33]+= uik[35]*u28+ uik[36]*u29+ uik[37]*u30+ uik[38]*u31+ uik[39]*u32+ uik[40]*u33+ uik[41]*u34; 1661b3064deSBarry Smith dk[34]+= uik[42]*u28+ uik[43]*u29+ uik[44]*u30+ uik[45]*u31+ uik[46]*u32+ uik[47]*u33+ uik[48]*u34; 16781278733SSatish Balay 1681b3064deSBarry Smith dk[35]+= uik[0]*u35 + uik[1]*u36 + uik[2]*u37 + uik[3]*u38 + uik[4]*u39 + uik[5]*u40 + uik[6]*u41; 1691b3064deSBarry Smith dk[36]+= uik[7]*u35 + uik[8]*u36 + uik[9]*u37+ uik[10]*u38+ uik[11]*u39+ uik[12]*u40+ uik[13]*u41; 1701b3064deSBarry Smith dk[37]+= uik[14]*u35+ uik[15]*u36+ uik[16]*u37+ uik[17]*u38+ uik[18]*u39+ uik[19]*u40+ uik[20]*u41; 1711b3064deSBarry Smith dk[38]+= uik[21]*u35+ uik[22]*u36+ uik[23]*u37+ uik[24]*u38+ uik[25]*u39+ uik[26]*u40+ uik[27]*u41; 1721b3064deSBarry Smith dk[39]+= uik[28]*u35+ uik[29]*u36+ uik[30]*u37+ uik[31]*u38+ uik[32]*u39+ uik[33]*u40+ uik[34]*u41; 1731b3064deSBarry Smith dk[40]+= uik[35]*u35+ uik[36]*u36+ uik[37]*u37+ uik[38]*u38+ uik[39]*u39+ uik[40]*u40+ uik[41]*u41; 1741b3064deSBarry Smith dk[41]+= uik[42]*u35+ uik[43]*u36+ uik[44]*u37+ uik[45]*u38+ uik[46]*u39+ uik[47]*u40+ uik[48]*u41; 17581278733SSatish Balay 1761b3064deSBarry Smith dk[42]+= uik[0]*u42 + uik[1]*u43 + uik[2]*u44 + uik[3]*u45 + uik[4]*u46 + uik[5]*u47 + uik[6]*u48; 1771b3064deSBarry Smith dk[43]+= uik[7]*u42 + uik[8]*u43 + uik[9]*u44+ uik[10]*u45+ uik[11]*u46+ uik[12]*u47+ uik[13]*u48; 1781b3064deSBarry Smith dk[44]+= uik[14]*u42+ uik[15]*u43+ uik[16]*u44+ uik[17]*u45+ uik[18]*u46+ uik[19]*u47+ uik[20]*u48; 1791b3064deSBarry Smith dk[45]+= uik[21]*u42+ uik[22]*u43+ uik[23]*u44+ uik[24]*u45+ uik[25]*u46+ uik[26]*u47+ uik[27]*u48; 1801b3064deSBarry Smith dk[46]+= uik[28]*u42+ uik[29]*u43+ uik[30]*u44+ uik[31]*u45+ uik[32]*u46+ uik[33]*u47+ uik[34]*u48; 1811b3064deSBarry Smith dk[47]+= uik[35]*u42+ uik[36]*u43+ uik[37]*u44+ uik[38]*u45+ uik[39]*u46+ uik[40]*u47+ uik[41]*u48; 1821b3064deSBarry Smith dk[48]+= uik[42]*u42+ uik[43]*u43+ uik[44]*u44+ uik[45]*u45+ uik[46]*u46+ uik[47]*u47+ uik[48]*u48; 18381278733SSatish Balay 184dc0b31edSSatish Balay ierr = PetscLogFlops(343.0*4.0);CHKERRQ(ierr); 185187a9f4bSHong Zhang 18681278733SSatish Balay /* update -U(i,k) */ 18781278733SSatish Balay ierr = PetscMemcpy(ba+ili*49,uik,49*sizeof(MatScalar));CHKERRQ(ierr); 18881278733SSatish Balay 18981278733SSatish Balay /* add multiple of row i to k-th row ... */ 19081278733SSatish Balay jmin = ili + 1; jmax = bi[i+1]; 19181278733SSatish Balay if (jmin < jmax){ 19281278733SSatish Balay for (j=jmin; j<jmax; j++) { 19381278733SSatish Balay /* w += -U(i,k)^T * U_bar(i,j) */ 19481278733SSatish Balay wp = w + bj[j]*49; 19581278733SSatish Balay u = ba + j*49; 19681278733SSatish Balay 1971b3064deSBarry Smith u0 = u[0]; u1 = u[1]; u2 = u[2]; u3 = u[3]; u4 = u[4]; u5 = u[5]; u6 = u[6]; 1981b3064deSBarry Smith u7 = u[7]; u8 = u[8]; u9 = u[9]; u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13]; 1991b3064deSBarry Smith u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20]; 2001b3064deSBarry Smith u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27]; 2011b3064deSBarry Smith u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34]; 2021b3064deSBarry Smith u35 = u[35]; u36 = u[36]; u37 = u[37]; u38 = u[38]; u39 = u[39]; u40 = u[40]; u41 = u[41]; u42 = u[42]; 2031b3064deSBarry Smith u43 = u[43]; u44 = u[44]; u45 = u[45]; u46 = u[46]; u47 = u[47]; u48 = u[48]; 20481278733SSatish Balay 2051b3064deSBarry Smith wp[0]+= uik[0]*u0 + uik[1]*u1 + uik[2]*u2 + uik[3]*u3 + uik[4]*u4 + uik[5]*u5 + uik[6]*u6; 2061b3064deSBarry Smith wp[1]+= uik[7]*u0 + uik[8]*u1 + uik[9]*u2+ uik[10]*u3+ uik[11]*u4+ uik[12]*u5+ uik[13]*u6; 2071b3064deSBarry Smith wp[2]+= uik[14]*u0+ uik[15]*u1+ uik[16]*u2+ uik[17]*u3+ uik[18]*u4+ uik[19]*u5+ uik[20]*u6; 2081b3064deSBarry Smith wp[3]+= uik[21]*u0+ uik[22]*u1+ uik[23]*u2+ uik[24]*u3+ uik[25]*u4+ uik[26]*u5+ uik[27]*u6; 2091b3064deSBarry Smith wp[4]+= uik[28]*u0+ uik[29]*u1+ uik[30]*u2+ uik[31]*u3+ uik[32]*u4+ uik[33]*u5+ uik[34]*u6; 2101b3064deSBarry Smith wp[5]+= uik[35]*u0+ uik[36]*u1+ uik[37]*u2+ uik[38]*u3+ uik[39]*u4+ uik[40]*u5+ uik[41]*u6; 2111b3064deSBarry Smith wp[6]+= uik[42]*u0+ uik[43]*u1+ uik[44]*u2+ uik[45]*u3+ uik[46]*u4+ uik[47]*u5+ uik[48]*u6; 21281278733SSatish Balay 2131b3064deSBarry Smith wp[7]+= uik[0]*u7 + uik[1]*u8 + uik[2]*u9 + uik[3]*u10 + uik[4]*u11 + uik[5]*u12 + uik[6]*u13; 2141b3064deSBarry Smith wp[8]+= uik[7]*u7 + uik[8]*u8 + uik[9]*u9+ uik[10]*u10+ uik[11]*u11+ uik[12]*u12+ uik[13]*u13; 2151b3064deSBarry Smith wp[9]+= uik[14]*u7+ uik[15]*u8+ uik[16]*u9+ uik[17]*u10+ uik[18]*u11+ uik[19]*u12+ uik[20]*u13; 2161b3064deSBarry Smith wp[10]+=uik[21]*u7+ uik[22]*u8+ uik[23]*u9+ uik[24]*u10+ uik[25]*u11+ uik[26]*u12+ uik[27]*u13; 2171b3064deSBarry Smith wp[11]+=uik[28]*u7+ uik[29]*u8+ uik[30]*u9+ uik[31]*u10+ uik[32]*u11+ uik[33]*u12+ uik[34]*u13; 2181b3064deSBarry Smith wp[12]+=uik[35]*u7+ uik[36]*u8+ uik[37]*u9+ uik[38]*u10+ uik[39]*u11+ uik[40]*u12+ uik[41]*u13; 2191b3064deSBarry Smith wp[13]+=uik[42]*u7+ uik[43]*u8+ uik[44]*u9+ uik[45]*u10+ uik[46]*u11+ uik[47]*u12+ uik[48]*u13; 22081278733SSatish Balay 2211b3064deSBarry Smith wp[14]+= uik[0]*u14 + uik[1]*u15 + uik[2]*u16 + uik[3]*u17 + uik[4]*u18 + uik[5]*u19 + uik[6]*u20; 2221b3064deSBarry Smith wp[15]+= uik[7]*u14 + uik[8]*u15 + uik[9]*u16+ uik[10]*u17+ uik[11]*u18+ uik[12]*u19+ uik[13]*u20; 2231b3064deSBarry Smith wp[16]+= uik[14]*u14+ uik[15]*u15+ uik[16]*u16+ uik[17]*u17+ uik[18]*u18+ uik[19]*u19+ uik[20]*u20; 2241b3064deSBarry Smith wp[17]+= uik[21]*u14+ uik[22]*u15+ uik[23]*u16+ uik[24]*u17+ uik[25]*u18+ uik[26]*u19+ uik[27]*u20; 2251b3064deSBarry Smith wp[18]+= uik[28]*u14+ uik[29]*u15+ uik[30]*u16+ uik[31]*u17+ uik[32]*u18+ uik[33]*u19+ uik[34]*u20; 2261b3064deSBarry Smith wp[19]+= uik[35]*u14+ uik[36]*u15+ uik[37]*u16+ uik[38]*u17+ uik[39]*u18+ uik[40]*u19+ uik[41]*u20; 2271b3064deSBarry Smith wp[20]+= uik[42]*u14+ uik[43]*u15+ uik[44]*u16+ uik[45]*u17+ uik[46]*u18+ uik[47]*u19+ uik[48]*u20; 22881278733SSatish Balay 2291b3064deSBarry Smith wp[21]+= uik[0]*u21 + uik[1]*u22 + uik[2]*u23 + uik[3]*u24 + uik[4]*u25 + uik[5]*u26 + uik[6]*u27; 2301b3064deSBarry Smith wp[22]+= uik[7]*u21 + uik[8]*u22 + uik[9]*u23+ uik[10]*u24+ uik[11]*u25+ uik[12]*u26+ uik[13]*u27; 2311b3064deSBarry Smith wp[23]+= uik[14]*u21+ uik[15]*u22+ uik[16]*u23+ uik[17]*u24+ uik[18]*u25+ uik[19]*u26+ uik[20]*u27; 2321b3064deSBarry Smith wp[24]+= uik[21]*u21+ uik[22]*u22+ uik[23]*u23+ uik[24]*u24+ uik[25]*u25+ uik[26]*u26+ uik[27]*u27; 2331b3064deSBarry Smith wp[25]+= uik[28]*u21+ uik[29]*u22+ uik[30]*u23+ uik[31]*u24+ uik[32]*u25+ uik[33]*u26+ uik[34]*u27; 2341b3064deSBarry Smith wp[26]+= uik[35]*u21+ uik[36]*u22+ uik[37]*u23+ uik[38]*u24+ uik[39]*u25+ uik[40]*u26+ uik[41]*u27; 2351b3064deSBarry Smith wp[27]+= uik[42]*u21+ uik[43]*u22+ uik[44]*u23+ uik[45]*u24+ uik[46]*u25+ uik[47]*u26+ uik[48]*u27; 23681278733SSatish Balay 2371b3064deSBarry Smith wp[28]+= uik[0]*u28 + uik[1]*u29 + uik[2]*u30 + uik[3]*u31 + uik[4]*u32 + uik[5]*u33 + uik[6]*u34; 2381b3064deSBarry Smith wp[29]+= uik[7]*u28 + uik[8]*u29 + uik[9]*u30+ uik[10]*u31+ uik[11]*u32+ uik[12]*u33+ uik[13]*u34; 2391b3064deSBarry Smith wp[30]+= uik[14]*u28+ uik[15]*u29+ uik[16]*u30+ uik[17]*u31+ uik[18]*u32+ uik[19]*u33+ uik[20]*u34; 2401b3064deSBarry Smith wp[31]+= uik[21]*u28+ uik[22]*u29+ uik[23]*u30+ uik[24]*u31+ uik[25]*u32+ uik[26]*u33+ uik[27]*u34; 2411b3064deSBarry Smith wp[32]+= uik[28]*u28+ uik[29]*u29+ uik[30]*u30+ uik[31]*u31+ uik[32]*u32+ uik[33]*u33+ uik[34]*u34; 2421b3064deSBarry Smith wp[33]+= uik[35]*u28+ uik[36]*u29+ uik[37]*u30+ uik[38]*u31+ uik[39]*u32+ uik[40]*u33+ uik[41]*u34; 2431b3064deSBarry Smith wp[34]+= uik[42]*u28+ uik[43]*u29+ uik[44]*u30+ uik[45]*u31+ uik[46]*u32+ uik[47]*u33+ uik[48]*u34; 24481278733SSatish Balay 2451b3064deSBarry Smith wp[35]+= uik[0]*u35 + uik[1]*u36 + uik[2]*u37 + uik[3]*u38 + uik[4]*u39 + uik[5]*u40 + uik[6]*u41; 2461b3064deSBarry Smith wp[36]+= uik[7]*u35 + uik[8]*u36 + uik[9]*u37+ uik[10]*u38+ uik[11]*u39+ uik[12]*u40+ uik[13]*u41; 2471b3064deSBarry Smith wp[37]+= uik[14]*u35+ uik[15]*u36+ uik[16]*u37+ uik[17]*u38+ uik[18]*u39+ uik[19]*u40+ uik[20]*u41; 2481b3064deSBarry Smith wp[38]+= uik[21]*u35+ uik[22]*u36+ uik[23]*u37+ uik[24]*u38+ uik[25]*u39+ uik[26]*u40+ uik[27]*u41; 2491b3064deSBarry Smith wp[39]+= uik[28]*u35+ uik[29]*u36+ uik[30]*u37+ uik[31]*u38+ uik[32]*u39+ uik[33]*u40+ uik[34]*u41; 2501b3064deSBarry Smith wp[40]+= uik[35]*u35+ uik[36]*u36+ uik[37]*u37+ uik[38]*u38+ uik[39]*u39+ uik[40]*u40+ uik[41]*u41; 2511b3064deSBarry Smith wp[41]+= uik[42]*u35+ uik[43]*u36+ uik[44]*u37+ uik[45]*u38+ uik[46]*u39+ uik[47]*u40+ uik[48]*u41; 2521b3064deSBarry Smith 2531b3064deSBarry Smith wp[42]+= uik[0]*u42 + uik[1]*u43 + uik[2]*u44 + uik[3]*u45 + uik[4]*u46 + uik[5]*u47 + uik[6]*u48; 2541b3064deSBarry Smith wp[43]+= uik[7]*u42 + uik[8]*u43 + uik[9]*u44+ uik[10]*u45+ uik[11]*u46+ uik[12]*u47+ uik[13]*u48; 2551b3064deSBarry Smith wp[44]+= uik[14]*u42+ uik[15]*u43+ uik[16]*u44+ uik[17]*u45+ uik[18]*u46+ uik[19]*u47+ uik[20]*u48; 2561b3064deSBarry Smith wp[45]+= uik[21]*u42+ uik[22]*u43+ uik[23]*u44+ uik[24]*u45+ uik[25]*u46+ uik[26]*u47+ uik[27]*u48; 2571b3064deSBarry Smith wp[46]+= uik[28]*u42+ uik[29]*u43+ uik[30]*u44+ uik[31]*u45+ uik[32]*u46+ uik[33]*u47+ uik[34]*u48; 2581b3064deSBarry Smith wp[47]+= uik[35]*u42+ uik[36]*u43+ uik[37]*u44+ uik[38]*u45+ uik[39]*u46+ uik[40]*u47+ uik[41]*u48; 2591b3064deSBarry Smith wp[48]+= uik[42]*u42+ uik[43]*u43+ uik[44]*u44+ uik[45]*u45+ uik[46]*u46+ uik[47]*u47+ uik[48]*u48; 26081278733SSatish Balay } 261dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*343.0*(jmax-jmin));CHKERRQ(ierr); 26281278733SSatish Balay 26381278733SSatish Balay /* ... add i to row list for next nonzero entry */ 26481278733SSatish Balay il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 26581278733SSatish Balay j = bj[jmin]; 26681278733SSatish Balay jl[i] = jl[j]; jl[j] = i; /* update jl */ 26781278733SSatish Balay } 26881278733SSatish Balay i = nexti; 26981278733SSatish Balay } 27081278733SSatish Balay 27181278733SSatish Balay /* save nonzero entries in k-th row of U ... */ 27281278733SSatish Balay 27381278733SSatish Balay /* invert diagonal block */ 27481278733SSatish Balay d = ba+k*49; 27581278733SSatish Balay ierr = PetscMemcpy(d,dk,49*sizeof(MatScalar));CHKERRQ(ierr); 27662bba022SBarry Smith ierr = Kernel_A_gets_inverse_A_7(d,shift);CHKERRQ(ierr); 27781278733SSatish Balay 27881278733SSatish Balay jmin = bi[k]; jmax = bi[k+1]; 27981278733SSatish Balay if (jmin < jmax) { 28081278733SSatish Balay for (j=jmin; j<jmax; j++){ 28181278733SSatish Balay vj = bj[j]; /* block col. index of U */ 28281278733SSatish Balay u = ba + j*49; 28381278733SSatish Balay wp = w + vj*49; 28481278733SSatish Balay for (k1=0; k1<49; k1++){ 28581278733SSatish Balay *u++ = *wp; 28681278733SSatish Balay *wp++ = 0.0; 28781278733SSatish Balay } 28881278733SSatish Balay } 28981278733SSatish Balay 29081278733SSatish Balay /* ... add k to row list for first nonzero entry in k-th row */ 29181278733SSatish Balay il[k] = jmin; 29281278733SSatish Balay i = bj[jmin]; 29381278733SSatish Balay jl[k] = jl[i]; jl[i] = k; 29481278733SSatish Balay } 29581278733SSatish Balay } 29681278733SSatish Balay 29781278733SSatish Balay ierr = PetscFree(w);CHKERRQ(ierr); 298d8c74875SBarry Smith ierr = PetscFree2(il,jl);CHKERRQ(ierr); 299d8c74875SBarry Smith ierr = PetscFree2(dk,uik);CHKERRQ(ierr); 30081278733SSatish Balay 301*4f79d315SHong Zhang C->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 302*4f79d315SHong Zhang C->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 303*4f79d315SHong Zhang C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 304*4f79d315SHong Zhang C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace; 30581278733SSatish Balay C->assembled = PETSC_TRUE; 30681278733SSatish Balay C->preallocated = PETSC_TRUE; 307efee365bSSatish Balay ierr = PetscLogFlops(1.3333*343*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 30881278733SSatish Balay PetscFunctionReturn(0); 30927e0cc20SSatish Balay } 310