14e2b4712SSatish Balay #ifdef PETSC_RCS_HEADER 2744db789SBarry Smith static char vcid[] = "$Id: baijfact2.c,v 1.23 1999/01/24 19:57:49 bsmith Exp bsmith $"; 34e2b4712SSatish Balay #endif 44e2b4712SSatish Balay /* 54e2b4712SSatish Balay Factorization code for BAIJ format. 64e2b4712SSatish Balay */ 74e2b4712SSatish Balay 84e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h" 94e2b4712SSatish Balay #include "src/vec/vecimpl.h" 104e2b4712SSatish Balay #include "src/inline/ilu.h" 1174c49faeSBarry Smith #include "src/inline/dot.h" 124e2b4712SSatish Balay 134e2b4712SSatish Balay /* ----------------------------------------------------------- */ 144e2b4712SSatish Balay #undef __FUNC__ 154e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N" 164e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 174e2b4712SSatish Balay { 184e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 194e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 204e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 214e2b4712SSatish Balay int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 223f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 233f1db9ecSBarry Smith Scalar *x,*b,*sum,*tmp,*lsum; 244e2b4712SSatish Balay 254e2b4712SSatish Balay PetscFunctionBegin; 26e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 284e2b4712SSatish Balay tmp = a->solve_work; 294e2b4712SSatish Balay 304e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 314e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 324e2b4712SSatish Balay 334e2b4712SSatish Balay /* forward solve the lower triangular */ 344e2b4712SSatish Balay PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar)); 354e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 364e2b4712SSatish Balay v = aa + bs2*ai[i]; 374e2b4712SSatish Balay vi = aj + ai[i]; 384e2b4712SSatish Balay nz = a->diag[i] - ai[i]; 394e2b4712SSatish Balay sum = tmp + bs*i; 404e2b4712SSatish Balay PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar)); 414e2b4712SSatish Balay while (nz--) { 424e2b4712SSatish Balay Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++)); 434e2b4712SSatish Balay v += bs2; 444e2b4712SSatish Balay } 454e2b4712SSatish Balay } 464e2b4712SSatish Balay /* backward solve the upper triangular */ 474e2b4712SSatish Balay lsum = a->solve_work + a->n; 484e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 494e2b4712SSatish Balay v = aa + bs2*(a->diag[i] + 1); 504e2b4712SSatish Balay vi = aj + a->diag[i] + 1; 514e2b4712SSatish Balay nz = ai[i+1] - a->diag[i] - 1; 524e2b4712SSatish Balay PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar)); 534e2b4712SSatish Balay while (nz--) { 544e2b4712SSatish Balay Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++)); 554e2b4712SSatish Balay v += bs2; 564e2b4712SSatish Balay } 574e2b4712SSatish Balay Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs); 584e2b4712SSatish Balay PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar)); 594e2b4712SSatish Balay } 604e2b4712SSatish Balay 614e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 624e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 63e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 64e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 6542015d63SBarry Smith PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 664e2b4712SSatish Balay PetscFunctionReturn(0); 674e2b4712SSatish Balay } 684e2b4712SSatish Balay 694e2b4712SSatish Balay #undef __FUNC__ 704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7" 714e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 724e2b4712SSatish Balay { 734e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 744e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 754e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 764e2b4712SSatish Balay int *diag = a->diag; 773f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 783f1db9ecSBarry Smith Scalar sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 793f1db9ecSBarry Smith Scalar *x,*b,*tmp; 804e2b4712SSatish Balay 814e2b4712SSatish Balay PetscFunctionBegin; 82e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 83e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 844e2b4712SSatish Balay tmp = a->solve_work; 854e2b4712SSatish Balay 864e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 874e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 884e2b4712SSatish Balay 894e2b4712SSatish Balay /* forward solve the lower triangular */ 904e2b4712SSatish Balay idx = 7*(*r++); 914e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 924e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 934e2b4712SSatish Balay tmp[5] = b[5+idx]; tmp[6] = b[6+idx]; 944e2b4712SSatish Balay 954e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 964e2b4712SSatish Balay v = aa + 49*ai[i]; 974e2b4712SSatish Balay vi = aj + ai[i]; 984e2b4712SSatish Balay nz = diag[i] - ai[i]; 994e2b4712SSatish Balay idx = 7*(*r++); 1004e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 1014e2b4712SSatish Balay sum5 = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx]; 1024e2b4712SSatish Balay while (nz--) { 1034e2b4712SSatish Balay idx = 7*(*vi++); 1044e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 1054e2b4712SSatish Balay x4 = tmp[3+idx];x5 = tmp[4+idx]; 1064e2b4712SSatish Balay x6 = tmp[5+idx];x7 = tmp[6+idx]; 1074e2b4712SSatish Balay sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1084e2b4712SSatish Balay sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1094e2b4712SSatish Balay sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1104e2b4712SSatish Balay sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1114e2b4712SSatish Balay sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1124e2b4712SSatish Balay sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1134e2b4712SSatish Balay sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1144e2b4712SSatish Balay v += 49; 1154e2b4712SSatish Balay } 1164e2b4712SSatish Balay idx = 7*i; 1174e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 1184e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 1194e2b4712SSatish Balay tmp[5+idx] = sum6;tmp[6+idx] = sum7; 1204e2b4712SSatish Balay } 1214e2b4712SSatish Balay /* backward solve the upper triangular */ 1224e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 1234e2b4712SSatish Balay v = aa + 49*diag[i] + 49; 1244e2b4712SSatish Balay vi = aj + diag[i] + 1; 1254e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 1264e2b4712SSatish Balay idt = 7*i; 1274e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 1284e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 1294e2b4712SSatish Balay sum6 = tmp[5+idt];sum7 = tmp[6+idt]; 1304e2b4712SSatish Balay while (nz--) { 1314e2b4712SSatish Balay idx = 7*(*vi++); 1324e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 1334e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 1344e2b4712SSatish Balay x6 = tmp[5+idx]; x7 = tmp[6+idx]; 1354e2b4712SSatish Balay sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1364e2b4712SSatish Balay sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1374e2b4712SSatish Balay sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1384e2b4712SSatish Balay sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1394e2b4712SSatish Balay sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1404e2b4712SSatish Balay sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1414e2b4712SSatish Balay sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1424e2b4712SSatish Balay v += 49; 1434e2b4712SSatish Balay } 1444e2b4712SSatish Balay idc = 7*(*c--); 1454e2b4712SSatish Balay v = aa + 49*diag[i]; 1464e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[7]*sum2+v[14]*sum3+ 1474e2b4712SSatish Balay v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7; 1484e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+ 1494e2b4712SSatish Balay v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7; 1504e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+ 1514e2b4712SSatish Balay v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7; 1524e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+ 1534e2b4712SSatish Balay v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7; 1544e2b4712SSatish Balay x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+ 1554e2b4712SSatish Balay v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7; 1564e2b4712SSatish Balay x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+ 1574e2b4712SSatish Balay v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7; 1584e2b4712SSatish Balay x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+ 1594e2b4712SSatish Balay v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7; 1604e2b4712SSatish Balay } 1614e2b4712SSatish Balay 1624e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 1634e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 164e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 165e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 16642015d63SBarry Smith PLogFlops(2*49*(a->nz) - 7*a->n); 1674e2b4712SSatish Balay PetscFunctionReturn(0); 1684e2b4712SSatish Balay } 1694e2b4712SSatish Balay 1704e2b4712SSatish Balay #undef __FUNC__ 171*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering" 172*15091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 173*15091d37SBarry Smith { 174*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 175*15091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 176*15091d37SBarry Smith int ierr,*diag = a->diag,jdx; 177*15091d37SBarry Smith MatScalar *aa=a->a,*v; 178*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 179*15091d37SBarry Smith 180*15091d37SBarry Smith PetscFunctionBegin; 181*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 182*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 183*15091d37SBarry Smith /* forward solve the lower triangular */ 184*15091d37SBarry Smith idx = 0; 185*15091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 186*15091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 187*15091d37SBarry Smith x[6] = b[6+idx]; 188*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 189*15091d37SBarry Smith v = aa + 49*ai[i]; 190*15091d37SBarry Smith vi = aj + ai[i]; 191*15091d37SBarry Smith nz = diag[i] - ai[i]; 192*15091d37SBarry Smith idx = 7*i; 193*15091d37SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 194*15091d37SBarry Smith sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 195*15091d37SBarry Smith sum7 = b[6+idx]; 196*15091d37SBarry Smith while (nz--) { 197*15091d37SBarry Smith jdx = 7*(*vi++); 198*15091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 199*15091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 200*15091d37SBarry Smith x7 = x[6+jdx]; 201*15091d37SBarry Smith sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 202*15091d37SBarry Smith sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 203*15091d37SBarry Smith sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 204*15091d37SBarry Smith sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 205*15091d37SBarry Smith sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 206*15091d37SBarry Smith sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 207*15091d37SBarry Smith sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 208*15091d37SBarry Smith v += 49; 209*15091d37SBarry Smith } 210*15091d37SBarry Smith x[idx] = sum1; 211*15091d37SBarry Smith x[1+idx] = sum2; 212*15091d37SBarry Smith x[2+idx] = sum3; 213*15091d37SBarry Smith x[3+idx] = sum4; 214*15091d37SBarry Smith x[4+idx] = sum5; 215*15091d37SBarry Smith x[5+idx] = sum6; 216*15091d37SBarry Smith x[6+idx] = sum7; 217*15091d37SBarry Smith } 218*15091d37SBarry Smith /* backward solve the upper triangular */ 219*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 220*15091d37SBarry Smith v = aa + 49*diag[i] + 49; 221*15091d37SBarry Smith vi = aj + diag[i] + 1; 222*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 223*15091d37SBarry Smith idt = 7*i; 224*15091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 225*15091d37SBarry Smith sum3 = x[2+idt]; sum4 = x[3+idt]; 226*15091d37SBarry Smith sum5 = x[4+idt]; sum6 = x[5+idt]; 227*15091d37SBarry Smith sum7 = x[6+idt]; 228*15091d37SBarry Smith while (nz--) { 229*15091d37SBarry Smith idx = 7*(*vi++); 230*15091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 231*15091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 232*15091d37SBarry Smith x7 = x[6+idx]; 233*15091d37SBarry Smith sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 234*15091d37SBarry Smith sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 235*15091d37SBarry Smith sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 236*15091d37SBarry Smith sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 237*15091d37SBarry Smith sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 238*15091d37SBarry Smith sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 239*15091d37SBarry Smith sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 240*15091d37SBarry Smith v += 49; 241*15091d37SBarry Smith } 242*15091d37SBarry Smith v = aa + 49*diag[i]; 243*15091d37SBarry Smith x[idt] = v[0]*sum1 + v[7]*sum2 + v[14]*sum3 + v[21]*sum4 244*15091d37SBarry Smith + v[28]*sum5 + v[35]*sum6 + v[42]*sum7; 245*15091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[8]*sum2 + v[15]*sum3 + v[22]*sum4 246*15091d37SBarry Smith + v[29]*sum5 + v[36]*sum6 + v[43]*sum7; 247*15091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[9]*sum2 + v[16]*sum3 + v[23]*sum4 248*15091d37SBarry Smith + v[30]*sum5 + v[37]*sum6 + v[44]*sum7; 249*15091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[10]*sum2 + v[17]*sum3 + v[24]*sum4 250*15091d37SBarry Smith + v[31]*sum5 + v[38]*sum6 + v[45]*sum7; 251*15091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[11]*sum2 + v[18]*sum3 + v[25]*sum4 252*15091d37SBarry Smith + v[32]*sum5 + v[39]*sum6 + v[46]*sum7; 253*15091d37SBarry Smith x[5+idt] = v[5]*sum1 + v[12]*sum2 + v[19]*sum3 + v[26]*sum4 254*15091d37SBarry Smith + v[33]*sum5 + v[40]*sum6 + v[47]*sum7; 255*15091d37SBarry Smith x[6+idt] = v[6]*sum1 + v[13]*sum2 + v[20]*sum3 + v[27]*sum4 256*15091d37SBarry Smith + v[34]*sum5 + v[41]*sum6 + v[48]*sum7; 257*15091d37SBarry Smith } 258*15091d37SBarry Smith 259*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 260*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 261*15091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 262*15091d37SBarry Smith PetscFunctionReturn(0); 263*15091d37SBarry Smith } 264*15091d37SBarry Smith 265*15091d37SBarry Smith #undef __FUNC__ 266*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6" 267*15091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 268*15091d37SBarry Smith { 269*15091d37SBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 270*15091d37SBarry Smith IS iscol=a->col,isrow=a->row; 271*15091d37SBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 272*15091d37SBarry Smith int *diag = a->diag; 273*15091d37SBarry Smith MatScalar *aa=a->a,*v; 274*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp; 275*15091d37SBarry Smith 276*15091d37SBarry Smith PetscFunctionBegin; 277*15091d37SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 278*15091d37SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 279*15091d37SBarry Smith tmp = a->solve_work; 280*15091d37SBarry Smith 281*15091d37SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 282*15091d37SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 283*15091d37SBarry Smith 284*15091d37SBarry Smith /* forward solve the lower triangular */ 285*15091d37SBarry Smith idx = 6*(*r++); 286*15091d37SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; 287*15091d37SBarry Smith tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 288*15091d37SBarry Smith tmp[4] = b[4+idx]; tmp[5] = b[5+idx]; 289*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 290*15091d37SBarry Smith v = aa + 36*ai[i]; 291*15091d37SBarry Smith vi = aj + ai[i]; 292*15091d37SBarry Smith nz = diag[i] - ai[i]; 293*15091d37SBarry Smith idx = 6*(*r++); 294*15091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 295*15091d37SBarry Smith sum5 = b[4+idx]; sum6 = b[5+idx]; 296*15091d37SBarry Smith while (nz--) { 297*15091d37SBarry Smith idx = 6*(*vi++); 298*15091d37SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 299*15091d37SBarry Smith x4 = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx]; 300*15091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 301*15091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 302*15091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 303*15091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 304*15091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 305*15091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 306*15091d37SBarry Smith v += 36; 307*15091d37SBarry Smith } 308*15091d37SBarry Smith idx = 6*i; 309*15091d37SBarry Smith tmp[idx] = sum1;tmp[1+idx] = sum2; 310*15091d37SBarry Smith tmp[2+idx] = sum3;tmp[3+idx] = sum4; 311*15091d37SBarry Smith tmp[4+idx] = sum5;tmp[5+idx] = sum6; 312*15091d37SBarry Smith } 313*15091d37SBarry Smith /* backward solve the upper triangular */ 314*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 315*15091d37SBarry Smith v = aa + 36*diag[i] + 36; 316*15091d37SBarry Smith vi = aj + diag[i] + 1; 317*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 318*15091d37SBarry Smith idt = 6*i; 319*15091d37SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; 320*15091d37SBarry Smith sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 321*15091d37SBarry Smith sum5 = tmp[4+idt];sum6 = tmp[5+idt]; 322*15091d37SBarry Smith while (nz--) { 323*15091d37SBarry Smith idx = 6*(*vi++); 324*15091d37SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 325*15091d37SBarry Smith x3 = tmp[2+idx]; x4 = tmp[3+idx]; 326*15091d37SBarry Smith x5 = tmp[4+idx]; x6 = tmp[5+idx]; 327*15091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 328*15091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 329*15091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 330*15091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 331*15091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 332*15091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 333*15091d37SBarry Smith v += 36; 334*15091d37SBarry Smith } 335*15091d37SBarry Smith idc = 6*(*c--); 336*15091d37SBarry Smith v = aa + 36*diag[i]; 337*15091d37SBarry Smith x[idc] = tmp[idt] = v[0]*sum1+v[6]*sum2+v[12]*sum3+ 338*15091d37SBarry Smith v[18]*sum4+v[24]*sum5+v[30]*sum6; 339*15091d37SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+ 340*15091d37SBarry Smith v[19]*sum4+v[25]*sum5+v[31]*sum6; 341*15091d37SBarry Smith x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+ 342*15091d37SBarry Smith v[20]*sum4+v[26]*sum5+v[32]*sum6; 343*15091d37SBarry Smith x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+ 344*15091d37SBarry Smith v[21]*sum4+v[27]*sum5+v[33]*sum6; 345*15091d37SBarry Smith x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+ 346*15091d37SBarry Smith v[22]*sum4+v[28]*sum5+v[34]*sum6; 347*15091d37SBarry Smith x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+ 348*15091d37SBarry Smith v[23]*sum4+v[29]*sum5+v[35]*sum6; 349*15091d37SBarry Smith } 350*15091d37SBarry Smith 351*15091d37SBarry Smith ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 352*15091d37SBarry Smith ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 353*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 354*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 355*15091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 356*15091d37SBarry Smith PetscFunctionReturn(0); 357*15091d37SBarry Smith } 358*15091d37SBarry Smith 359*15091d37SBarry Smith #undef __FUNC__ 360*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering" 361*15091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 362*15091d37SBarry Smith { 363*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 364*15091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 365*15091d37SBarry Smith int ierr,*diag = a->diag,jdx; 366*15091d37SBarry Smith MatScalar *aa=a->a,*v; 367*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6; 368*15091d37SBarry Smith 369*15091d37SBarry Smith PetscFunctionBegin; 370*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 371*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 372*15091d37SBarry Smith /* forward solve the lower triangular */ 373*15091d37SBarry Smith idx = 0; 374*15091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 375*15091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 376*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 377*15091d37SBarry Smith v = aa + 36*ai[i]; 378*15091d37SBarry Smith vi = aj + ai[i]; 379*15091d37SBarry Smith nz = diag[i] - ai[i]; 380*15091d37SBarry Smith idx = 6*i; 381*15091d37SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 382*15091d37SBarry Smith sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 383*15091d37SBarry Smith while (nz--) { 384*15091d37SBarry Smith jdx = 6*(*vi++); 385*15091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 386*15091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 387*15091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 388*15091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 389*15091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 390*15091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 391*15091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 392*15091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 393*15091d37SBarry Smith v += 36; 394*15091d37SBarry Smith } 395*15091d37SBarry Smith x[idx] = sum1; 396*15091d37SBarry Smith x[1+idx] = sum2; 397*15091d37SBarry Smith x[2+idx] = sum3; 398*15091d37SBarry Smith x[3+idx] = sum4; 399*15091d37SBarry Smith x[4+idx] = sum5; 400*15091d37SBarry Smith x[5+idx] = sum6; 401*15091d37SBarry Smith } 402*15091d37SBarry Smith /* backward solve the upper triangular */ 403*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 404*15091d37SBarry Smith v = aa + 36*diag[i] + 36; 405*15091d37SBarry Smith vi = aj + diag[i] + 1; 406*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 407*15091d37SBarry Smith idt = 6*i; 408*15091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 409*15091d37SBarry Smith sum3 = x[2+idt]; sum4 = x[3+idt]; 410*15091d37SBarry Smith sum5 = x[4+idt]; sum6 = x[5+idt]; 411*15091d37SBarry Smith while (nz--) { 412*15091d37SBarry Smith idx = 6*(*vi++); 413*15091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 414*15091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 415*15091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 416*15091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 417*15091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 418*15091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 419*15091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 420*15091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 421*15091d37SBarry Smith v += 36; 422*15091d37SBarry Smith } 423*15091d37SBarry Smith v = aa + 36*diag[i]; 424*15091d37SBarry Smith x[idt] = v[0]*sum1 + v[6]*sum2 + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6; 425*15091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[7]*sum2 + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6; 426*15091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[8]*sum2 + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6; 427*15091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[9]*sum2 + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6; 428*15091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6; 429*15091d37SBarry Smith x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6; 430*15091d37SBarry Smith } 431*15091d37SBarry Smith 432*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 433*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 434*15091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 435*15091d37SBarry Smith PetscFunctionReturn(0); 436*15091d37SBarry Smith } 437*15091d37SBarry Smith 438*15091d37SBarry Smith #undef __FUNC__ 4394e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5" 4404e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 4414e2b4712SSatish Balay { 4424e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4434e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 4444e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 4454e2b4712SSatish Balay int *diag = a->diag; 4463f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 4473f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp; 4484e2b4712SSatish Balay 4494e2b4712SSatish Balay PetscFunctionBegin; 450e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 451e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 4524e2b4712SSatish Balay tmp = a->solve_work; 4534e2b4712SSatish Balay 4544e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 4554e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 4564e2b4712SSatish Balay 4574e2b4712SSatish Balay /* forward solve the lower triangular */ 4584e2b4712SSatish Balay idx = 5*(*r++); 4594e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 4604e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 4614e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 4624e2b4712SSatish Balay v = aa + 25*ai[i]; 4634e2b4712SSatish Balay vi = aj + ai[i]; 4644e2b4712SSatish Balay nz = diag[i] - ai[i]; 4654e2b4712SSatish Balay idx = 5*(*r++); 4664e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 4674e2b4712SSatish Balay sum5 = b[4+idx]; 4684e2b4712SSatish Balay while (nz--) { 4694e2b4712SSatish Balay idx = 5*(*vi++); 4704e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 4714e2b4712SSatish Balay x4 = tmp[3+idx];x5 = tmp[4+idx]; 4724e2b4712SSatish Balay sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 4734e2b4712SSatish Balay sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 4744e2b4712SSatish Balay sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 4754e2b4712SSatish Balay sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 4764e2b4712SSatish Balay sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 4774e2b4712SSatish Balay v += 25; 4784e2b4712SSatish Balay } 4794e2b4712SSatish Balay idx = 5*i; 4804e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 4814e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 4824e2b4712SSatish Balay } 4834e2b4712SSatish Balay /* backward solve the upper triangular */ 4844e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 4854e2b4712SSatish Balay v = aa + 25*diag[i] + 25; 4864e2b4712SSatish Balay vi = aj + diag[i] + 1; 4874e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 4884e2b4712SSatish Balay idt = 5*i; 4894e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 4904e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 4914e2b4712SSatish Balay while (nz--) { 4924e2b4712SSatish Balay idx = 5*(*vi++); 4934e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 4944e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 4954e2b4712SSatish Balay sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 4964e2b4712SSatish Balay sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 4974e2b4712SSatish Balay sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 4984e2b4712SSatish Balay sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 4994e2b4712SSatish Balay sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 5004e2b4712SSatish Balay v += 25; 5014e2b4712SSatish Balay } 5024e2b4712SSatish Balay idc = 5*(*c--); 5034e2b4712SSatish Balay v = aa + 25*diag[i]; 5044e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 5054e2b4712SSatish Balay v[15]*sum4+v[20]*sum5; 5064e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 5074e2b4712SSatish Balay v[16]*sum4+v[21]*sum5; 5084e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 5094e2b4712SSatish Balay v[17]*sum4+v[22]*sum5; 5104e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 5114e2b4712SSatish Balay v[18]*sum4+v[23]*sum5; 5124e2b4712SSatish Balay x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 5134e2b4712SSatish Balay v[19]*sum4+v[24]*sum5; 5144e2b4712SSatish Balay } 5154e2b4712SSatish Balay 5164e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 5174e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 518e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 519e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 52042015d63SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 5214e2b4712SSatish Balay PetscFunctionReturn(0); 5224e2b4712SSatish Balay } 5234e2b4712SSatish Balay 5244e2b4712SSatish Balay #undef __FUNC__ 525*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 526*15091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 527*15091d37SBarry Smith { 528*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 529*15091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 530*15091d37SBarry Smith int ierr,*diag = a->diag,jdx; 531*15091d37SBarry Smith MatScalar *aa=a->a,*v; 532*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;; 533*15091d37SBarry Smith 534*15091d37SBarry Smith PetscFunctionBegin; 535*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 536*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 537*15091d37SBarry Smith /* forward solve the lower triangular */ 538*15091d37SBarry Smith idx = 0; 539*15091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 540*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 541*15091d37SBarry Smith v = aa + 25*ai[i]; 542*15091d37SBarry Smith vi = aj + ai[i]; 543*15091d37SBarry Smith nz = diag[i] - ai[i]; 544*15091d37SBarry Smith idx = 5*i; 545*15091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx]; 546*15091d37SBarry Smith while (nz--) { 547*15091d37SBarry Smith jdx = 5*(*vi++); 548*15091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 549*15091d37SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 550*15091d37SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 551*15091d37SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 552*15091d37SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 553*15091d37SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 554*15091d37SBarry Smith v += 25; 555*15091d37SBarry Smith } 556*15091d37SBarry Smith x[idx] = sum1; 557*15091d37SBarry Smith x[1+idx] = sum2; 558*15091d37SBarry Smith x[2+idx] = sum3; 559*15091d37SBarry Smith x[3+idx] = sum4; 560*15091d37SBarry Smith x[4+idx] = sum5; 561*15091d37SBarry Smith } 562*15091d37SBarry Smith /* backward solve the upper triangular */ 563*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 564*15091d37SBarry Smith v = aa + 25*diag[i] + 25; 565*15091d37SBarry Smith vi = aj + diag[i] + 1; 566*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 567*15091d37SBarry Smith idt = 5*i; 568*15091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 569*15091d37SBarry Smith sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt]; 570*15091d37SBarry Smith while (nz--) { 571*15091d37SBarry Smith idx = 5*(*vi++); 572*15091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 573*15091d37SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 574*15091d37SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 575*15091d37SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 576*15091d37SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 577*15091d37SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 578*15091d37SBarry Smith v += 25; 579*15091d37SBarry Smith } 580*15091d37SBarry Smith v = aa + 25*diag[i]; 581*15091d37SBarry Smith x[idt] = v[0]*sum1 + v[5]*sum2 + v[10]*sum3 + v[15]*sum4 + v[20]*sum5; 582*15091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3 + v[16]*sum4 + v[21]*sum5; 583*15091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3 + v[17]*sum4 + v[22]*sum5; 584*15091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3 + v[18]*sum4 + v[23]*sum5; 585*15091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3 + v[19]*sum4 + v[24]*sum5; 586*15091d37SBarry Smith } 587*15091d37SBarry Smith 588*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 589*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 590*15091d37SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 591*15091d37SBarry Smith PetscFunctionReturn(0); 592*15091d37SBarry Smith } 593*15091d37SBarry Smith 594*15091d37SBarry Smith #undef __FUNC__ 5954e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4" 5964e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 5974e2b4712SSatish Balay { 5984e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 5994e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 6004e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 6014e2b4712SSatish Balay int *diag = a->diag; 6023f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 6033f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp; 6044e2b4712SSatish Balay 6054e2b4712SSatish Balay PetscFunctionBegin; 606e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 607e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 6084e2b4712SSatish Balay tmp = a->solve_work; 6094e2b4712SSatish Balay 6104e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 6114e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 6124e2b4712SSatish Balay 6134e2b4712SSatish Balay /* forward solve the lower triangular */ 6144e2b4712SSatish Balay idx = 4*(*r++); 6154e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 6164e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 6174e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 6184e2b4712SSatish Balay v = aa + 16*ai[i]; 6194e2b4712SSatish Balay vi = aj + ai[i]; 6204e2b4712SSatish Balay nz = diag[i] - ai[i]; 6214e2b4712SSatish Balay idx = 4*(*r++); 6224e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 6234e2b4712SSatish Balay while (nz--) { 6244e2b4712SSatish Balay idx = 4*(*vi++); 6254e2b4712SSatish Balay x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 6264e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 6274e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 6284e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 6294e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 6304e2b4712SSatish Balay v += 16; 6314e2b4712SSatish Balay } 6324e2b4712SSatish Balay idx = 4*i; 6334e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 6344e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; 6354e2b4712SSatish Balay } 6364e2b4712SSatish Balay /* backward solve the upper triangular */ 6374e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 6384e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 6394e2b4712SSatish Balay vi = aj + diag[i] + 1; 6404e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 6414e2b4712SSatish Balay idt = 4*i; 6424e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 6434e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 6444e2b4712SSatish Balay while (nz--) { 6454e2b4712SSatish Balay idx = 4*(*vi++); 6464e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 6474e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; 6484e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 6494e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 6504e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 6514e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 6524e2b4712SSatish Balay v += 16; 6534e2b4712SSatish Balay } 6544e2b4712SSatish Balay idc = 4*(*c--); 6554e2b4712SSatish Balay v = aa + 16*diag[i]; 6564e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 6574e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 6584e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 6594e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 6604e2b4712SSatish Balay } 6614e2b4712SSatish Balay 6624e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 6634e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 664e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 665e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 66642015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 6674e2b4712SSatish Balay PetscFunctionReturn(0); 6684e2b4712SSatish Balay } 6690ef38995SBarry Smith 6700ef38995SBarry Smith 6714e2b4712SSatish Balay /* 6724e2b4712SSatish Balay Special case where the matrix was ILU(0) factored in the natural 6734e2b4712SSatish Balay ordering. This eliminates the need for the column and row permutation. 6744e2b4712SSatish Balay */ 6754e2b4712SSatish Balay #undef __FUNC__ 6764e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 6774e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 6784e2b4712SSatish Balay { 6794e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 68030d4dcafSBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 68130d4dcafSBarry Smith int ierr,*diag = a->diag; 6823f1db9ecSBarry Smith MatScalar *aa=a->a; 68330d4dcafSBarry Smith Scalar *x,*b; 6844e2b4712SSatish Balay 6854e2b4712SSatish Balay PetscFunctionBegin; 686e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 687e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6884e2b4712SSatish Balay 6892853dc0eSBarry Smith #if defined(USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 6902853dc0eSBarry Smith { 6912853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 6922853dc0eSBarry Smith fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 6932853dc0eSBarry Smith } 6942853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJ) 6952853dc0eSBarry Smith { 6962853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 6972853dc0eSBarry Smith fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 6982853dc0eSBarry Smith } 6992853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 7002853dc0eSBarry Smith fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 701e1293385SBarry Smith #else 70230d4dcafSBarry Smith { 7033f1db9ecSBarry Smith Scalar sum1,sum2,sum3,sum4,x1,x2,x3,x4; 7043f1db9ecSBarry Smith MatScalar *v; 70530d4dcafSBarry Smith int jdx,idt,idx,nz,*vi,i; 706e1293385SBarry Smith 7074e2b4712SSatish Balay /* forward solve the lower triangular */ 7084e2b4712SSatish Balay idx = 0; 709e1293385SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 7104e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 7114e2b4712SSatish Balay v = aa + 16*ai[i]; 7124e2b4712SSatish Balay vi = aj + ai[i]; 7134e2b4712SSatish Balay nz = diag[i] - ai[i]; 714e1293385SBarry Smith idx += 4; 7154e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 7164e2b4712SSatish Balay while (nz--) { 7174e2b4712SSatish Balay jdx = 4*(*vi++); 7184e2b4712SSatish Balay x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 7194e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 7204e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 7214e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 7224e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 7234e2b4712SSatish Balay v += 16; 7244e2b4712SSatish Balay } 7254e2b4712SSatish Balay x[idx] = sum1; 7264e2b4712SSatish Balay x[1+idx] = sum2; 7274e2b4712SSatish Balay x[2+idx] = sum3; 7284e2b4712SSatish Balay x[3+idx] = sum4; 7294e2b4712SSatish Balay } 7304e2b4712SSatish Balay /* backward solve the upper triangular */ 7314e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 7324e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 7334e2b4712SSatish Balay vi = aj + diag[i] + 1; 7344e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 7354e2b4712SSatish Balay idt = 4*i; 7364e2b4712SSatish Balay sum1 = x[idt]; sum2 = x[1+idt]; 7374e2b4712SSatish Balay sum3 = x[2+idt];sum4 = x[3+idt]; 7384e2b4712SSatish Balay while (nz--) { 7394e2b4712SSatish Balay idx = 4*(*vi++); 7404e2b4712SSatish Balay x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 7414e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 7424e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 7434e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 7444e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 7454e2b4712SSatish Balay v += 16; 7464e2b4712SSatish Balay } 7474e2b4712SSatish Balay v = aa + 16*diag[i]; 7484e2b4712SSatish Balay x[idt] = v[0]*sum1 + v[4]*sum2 + v[8]*sum3 + v[12]*sum4; 7494e2b4712SSatish Balay x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3 + v[13]*sum4; 7504e2b4712SSatish Balay x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4; 7514e2b4712SSatish Balay x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4; 7524e2b4712SSatish Balay } 75330d4dcafSBarry Smith } 754e1293385SBarry Smith #endif 7554e2b4712SSatish Balay 756e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 757e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 75842015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 7594e2b4712SSatish Balay PetscFunctionReturn(0); 7604e2b4712SSatish Balay } 7614e2b4712SSatish Balay 762667159a5SBarry Smith #undef __FUNC__ 7634e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3" 7644e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 7654e2b4712SSatish Balay { 7664e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 7674e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 7684e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 7694e2b4712SSatish Balay int *diag = a->diag; 7703f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 7713f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp; 7724e2b4712SSatish Balay 7734e2b4712SSatish Balay PetscFunctionBegin; 774e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 775e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 7764e2b4712SSatish Balay tmp = a->solve_work; 7774e2b4712SSatish Balay 7784e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7794e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 7804e2b4712SSatish Balay 7814e2b4712SSatish Balay /* forward solve the lower triangular */ 7824e2b4712SSatish Balay idx = 3*(*r++); 7834e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 7844e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 7854e2b4712SSatish Balay v = aa + 9*ai[i]; 7864e2b4712SSatish Balay vi = aj + ai[i]; 7874e2b4712SSatish Balay nz = diag[i] - ai[i]; 7884e2b4712SSatish Balay idx = 3*(*r++); 7894e2b4712SSatish Balay sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 7904e2b4712SSatish Balay while (nz--) { 7914e2b4712SSatish Balay idx = 3*(*vi++); 7924e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 7934e2b4712SSatish Balay sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 7944e2b4712SSatish Balay sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 7954e2b4712SSatish Balay sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 7964e2b4712SSatish Balay v += 9; 7974e2b4712SSatish Balay } 7984e2b4712SSatish Balay idx = 3*i; 7994e2b4712SSatish Balay tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 8004e2b4712SSatish Balay } 8014e2b4712SSatish Balay /* backward solve the upper triangular */ 8024e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 8034e2b4712SSatish Balay v = aa + 9*diag[i] + 9; 8044e2b4712SSatish Balay vi = aj + diag[i] + 1; 8054e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 8064e2b4712SSatish Balay idt = 3*i; 8074e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 8084e2b4712SSatish Balay while (nz--) { 8094e2b4712SSatish Balay idx = 3*(*vi++); 8104e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 8114e2b4712SSatish Balay sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 8124e2b4712SSatish Balay sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 8134e2b4712SSatish Balay sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 8144e2b4712SSatish Balay v += 9; 8154e2b4712SSatish Balay } 8164e2b4712SSatish Balay idc = 3*(*c--); 8174e2b4712SSatish Balay v = aa + 9*diag[i]; 8184e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 8194e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 8204e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 8214e2b4712SSatish Balay } 8224e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 8234e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 824e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 825e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 82642015d63SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 8274e2b4712SSatish Balay PetscFunctionReturn(0); 8284e2b4712SSatish Balay } 8294e2b4712SSatish Balay 830*15091d37SBarry Smith /* 831*15091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 832*15091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 833*15091d37SBarry Smith */ 834*15091d37SBarry Smith #undef __FUNC__ 835*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering" 836*15091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 837*15091d37SBarry Smith { 838*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 839*15091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 840*15091d37SBarry Smith int ierr,*diag = a->diag; 841*15091d37SBarry Smith MatScalar *aa=a->a, *v; 842*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3; 843*15091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 844*15091d37SBarry Smith 845*15091d37SBarry Smith PetscFunctionBegin; 846*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 847*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 848*15091d37SBarry Smith 849*15091d37SBarry Smith 850*15091d37SBarry Smith /* forward solve the lower triangular */ 851*15091d37SBarry Smith idx = 0; 852*15091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; 853*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 854*15091d37SBarry Smith v = aa + 9*ai[i]; 855*15091d37SBarry Smith vi = aj + ai[i]; 856*15091d37SBarry Smith nz = diag[i] - ai[i]; 857*15091d37SBarry Smith idx += 3; 858*15091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx]; 859*15091d37SBarry Smith while (nz--) { 860*15091d37SBarry Smith jdx = 3*(*vi++); 861*15091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx]; 862*15091d37SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 863*15091d37SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 864*15091d37SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 865*15091d37SBarry Smith v += 9; 866*15091d37SBarry Smith } 867*15091d37SBarry Smith x[idx] = sum1; 868*15091d37SBarry Smith x[1+idx] = sum2; 869*15091d37SBarry Smith x[2+idx] = sum3; 870*15091d37SBarry Smith } 871*15091d37SBarry Smith /* backward solve the upper triangular */ 872*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 873*15091d37SBarry Smith v = aa + 9*diag[i] + 9; 874*15091d37SBarry Smith vi = aj + diag[i] + 1; 875*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 876*15091d37SBarry Smith idt = 3*i; 877*15091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 878*15091d37SBarry Smith sum3 = x[2+idt]; 879*15091d37SBarry Smith while (nz--) { 880*15091d37SBarry Smith idx = 3*(*vi++); 881*15091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 882*15091d37SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 883*15091d37SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 884*15091d37SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 885*15091d37SBarry Smith v += 9; 886*15091d37SBarry Smith } 887*15091d37SBarry Smith v = aa + 9*diag[i]; 888*15091d37SBarry Smith x[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 889*15091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 890*15091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 891*15091d37SBarry Smith } 892*15091d37SBarry Smith 893*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 894*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 895*15091d37SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 896*15091d37SBarry Smith PetscFunctionReturn(0); 897*15091d37SBarry Smith } 898*15091d37SBarry Smith 8994e2b4712SSatish Balay #undef __FUNC__ 9004e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2" 9014e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 9024e2b4712SSatish Balay { 9034e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 9044e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 9054e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 9064e2b4712SSatish Balay int *diag = a->diag; 9073f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 9083f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,x1,x2,*tmp; 9094e2b4712SSatish Balay 9104e2b4712SSatish Balay PetscFunctionBegin; 911e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 912e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 9134e2b4712SSatish Balay tmp = a->solve_work; 9144e2b4712SSatish Balay 9154e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 9164e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 9174e2b4712SSatish Balay 9184e2b4712SSatish Balay /* forward solve the lower triangular */ 9194e2b4712SSatish Balay idx = 2*(*r++); 9204e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 9214e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 9224e2b4712SSatish Balay v = aa + 4*ai[i]; 9234e2b4712SSatish Balay vi = aj + ai[i]; 9244e2b4712SSatish Balay nz = diag[i] - ai[i]; 9254e2b4712SSatish Balay idx = 2*(*r++); 9264e2b4712SSatish Balay sum1 = b[idx]; sum2 = b[1+idx]; 9274e2b4712SSatish Balay while (nz--) { 9284e2b4712SSatish Balay idx = 2*(*vi++); 9294e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 9304e2b4712SSatish Balay sum1 -= v[0]*x1 + v[2]*x2; 9314e2b4712SSatish Balay sum2 -= v[1]*x1 + v[3]*x2; 9324e2b4712SSatish Balay v += 4; 9334e2b4712SSatish Balay } 9344e2b4712SSatish Balay idx = 2*i; 9354e2b4712SSatish Balay tmp[idx] = sum1; tmp[1+idx] = sum2; 9364e2b4712SSatish Balay } 9374e2b4712SSatish Balay /* backward solve the upper triangular */ 9384e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 9394e2b4712SSatish Balay v = aa + 4*diag[i] + 4; 9404e2b4712SSatish Balay vi = aj + diag[i] + 1; 9414e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 9424e2b4712SSatish Balay idt = 2*i; 9434e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 9444e2b4712SSatish Balay while (nz--) { 9454e2b4712SSatish Balay idx = 2*(*vi++); 9464e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 9474e2b4712SSatish Balay sum1 -= v[0]*x1 + v[2]*x2; 9484e2b4712SSatish Balay sum2 -= v[1]*x1 + v[3]*x2; 9494e2b4712SSatish Balay v += 4; 9504e2b4712SSatish Balay } 9514e2b4712SSatish Balay idc = 2*(*c--); 9524e2b4712SSatish Balay v = aa + 4*diag[i]; 9534e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 9544e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 9554e2b4712SSatish Balay } 9564e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 9574e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 958e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 959e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 96042015d63SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 9614e2b4712SSatish Balay PetscFunctionReturn(0); 9624e2b4712SSatish Balay } 9634e2b4712SSatish Balay 964*15091d37SBarry Smith /* 965*15091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 966*15091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 967*15091d37SBarry Smith */ 968*15091d37SBarry Smith #undef __FUNC__ 969*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering" 970*15091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 971*15091d37SBarry Smith { 972*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 973*15091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 974*15091d37SBarry Smith int ierr,*diag = a->diag; 975*15091d37SBarry Smith MatScalar *aa=a->a,*v; 976*15091d37SBarry Smith Scalar *x,*b,sum1,sum2,x1,x2; 977*15091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 978*15091d37SBarry Smith 979*15091d37SBarry Smith PetscFunctionBegin; 980*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 981*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 982*15091d37SBarry Smith 983*15091d37SBarry Smith /* forward solve the lower triangular */ 984*15091d37SBarry Smith idx = 0; 985*15091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; 986*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 987*15091d37SBarry Smith v = aa + 4*ai[i]; 988*15091d37SBarry Smith vi = aj + ai[i]; 989*15091d37SBarry Smith nz = diag[i] - ai[i]; 990*15091d37SBarry Smith idx += 2; 991*15091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx]; 992*15091d37SBarry Smith while (nz--) { 993*15091d37SBarry Smith jdx = 2*(*vi++); 994*15091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx]; 995*15091d37SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 996*15091d37SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 997*15091d37SBarry Smith v += 4; 998*15091d37SBarry Smith } 999*15091d37SBarry Smith x[idx] = sum1; 1000*15091d37SBarry Smith x[1+idx] = sum2; 1001*15091d37SBarry Smith } 1002*15091d37SBarry Smith /* backward solve the upper triangular */ 1003*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 1004*15091d37SBarry Smith v = aa + 4*diag[i] + 4; 1005*15091d37SBarry Smith vi = aj + diag[i] + 1; 1006*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 1007*15091d37SBarry Smith idt = 2*i; 1008*15091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 1009*15091d37SBarry Smith while (nz--) { 1010*15091d37SBarry Smith idx = 2*(*vi++); 1011*15091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 1012*15091d37SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 1013*15091d37SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 1014*15091d37SBarry Smith v += 4; 1015*15091d37SBarry Smith } 1016*15091d37SBarry Smith v = aa + 4*diag[i]; 1017*15091d37SBarry Smith x[idt] = v[0]*sum1 + v[2]*sum2; 1018*15091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[3]*sum2; 1019*15091d37SBarry Smith } 1020*15091d37SBarry Smith 1021*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1022*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1023*15091d37SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 1024*15091d37SBarry Smith PetscFunctionReturn(0); 1025*15091d37SBarry Smith } 1026*15091d37SBarry Smith 10274e2b4712SSatish Balay #undef __FUNC__ 10284e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1" 10294e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 10304e2b4712SSatish Balay { 10314e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 10324e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 10334e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 10344e2b4712SSatish Balay int *diag = a->diag; 10353f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 10363f1db9ecSBarry Smith Scalar *x,*b,sum1,*tmp; 10374e2b4712SSatish Balay 10384e2b4712SSatish Balay PetscFunctionBegin; 10394e2b4712SSatish Balay if (!n) PetscFunctionReturn(0); 10404e2b4712SSatish Balay 1041e1311b90SBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 1042e1311b90SBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 10434e2b4712SSatish Balay tmp = a->solve_work; 10444e2b4712SSatish Balay 10454e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 10464e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 10474e2b4712SSatish Balay 10484e2b4712SSatish Balay /* forward solve the lower triangular */ 10494e2b4712SSatish Balay tmp[0] = b[*r++]; 10504e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 10514e2b4712SSatish Balay v = aa + ai[i]; 10524e2b4712SSatish Balay vi = aj + ai[i]; 10534e2b4712SSatish Balay nz = diag[i] - ai[i]; 10544e2b4712SSatish Balay sum1 = b[*r++]; 10554e2b4712SSatish Balay while (nz--) { 10564e2b4712SSatish Balay sum1 -= (*v++)*tmp[*vi++]; 10574e2b4712SSatish Balay } 10584e2b4712SSatish Balay tmp[i] = sum1; 10594e2b4712SSatish Balay } 10604e2b4712SSatish Balay /* backward solve the upper triangular */ 10614e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 10624e2b4712SSatish Balay v = aa + diag[i] + 1; 10634e2b4712SSatish Balay vi = aj + diag[i] + 1; 10644e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 10654e2b4712SSatish Balay sum1 = tmp[i]; 10664e2b4712SSatish Balay while (nz--) { 10674e2b4712SSatish Balay sum1 -= (*v++)*tmp[*vi++]; 10684e2b4712SSatish Balay } 10694e2b4712SSatish Balay x[*c--] = tmp[i] = aa[diag[i]]*sum1; 10704e2b4712SSatish Balay } 10714e2b4712SSatish Balay 10724e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 10734e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 1074e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1075e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 10764e2b4712SSatish Balay PLogFlops(2*1*(a->nz) - a->n); 10774e2b4712SSatish Balay PetscFunctionReturn(0); 10784e2b4712SSatish Balay } 1079*15091d37SBarry Smith /* 1080*15091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 1081*15091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 1082*15091d37SBarry Smith */ 1083*15091d37SBarry Smith #undef __FUNC__ 1084*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering" 1085*15091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1086*15091d37SBarry Smith { 1087*15091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1088*15091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 1089*15091d37SBarry Smith int ierr,*diag = a->diag; 1090*15091d37SBarry Smith MatScalar *aa=a->a; 1091*15091d37SBarry Smith Scalar *x,*b; 1092*15091d37SBarry Smith Scalar sum1,x1; 1093*15091d37SBarry Smith MatScalar *v; 1094*15091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 1095*15091d37SBarry Smith 1096*15091d37SBarry Smith PetscFunctionBegin; 1097*15091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1098*15091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1099*15091d37SBarry Smith 1100*15091d37SBarry Smith /* forward solve the lower triangular */ 1101*15091d37SBarry Smith idx = 0; 1102*15091d37SBarry Smith x[0] = b[0]; 1103*15091d37SBarry Smith for ( i=1; i<n; i++ ) { 1104*15091d37SBarry Smith v = aa + ai[i]; 1105*15091d37SBarry Smith vi = aj + ai[i]; 1106*15091d37SBarry Smith nz = diag[i] - ai[i]; 1107*15091d37SBarry Smith idx += 1; 1108*15091d37SBarry Smith sum1 = b[idx]; 1109*15091d37SBarry Smith while (nz--) { 1110*15091d37SBarry Smith jdx = *vi++; 1111*15091d37SBarry Smith x1 = x[jdx]; 1112*15091d37SBarry Smith sum1 -= v[0]*x1; 1113*15091d37SBarry Smith v += 1; 1114*15091d37SBarry Smith } 1115*15091d37SBarry Smith x[idx] = sum1; 1116*15091d37SBarry Smith } 1117*15091d37SBarry Smith /* backward solve the upper triangular */ 1118*15091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 1119*15091d37SBarry Smith v = aa + diag[i] + 1; 1120*15091d37SBarry Smith vi = aj + diag[i] + 1; 1121*15091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 1122*15091d37SBarry Smith idt = i; 1123*15091d37SBarry Smith sum1 = x[idt]; 1124*15091d37SBarry Smith while (nz--) { 1125*15091d37SBarry Smith idx = *vi++; 1126*15091d37SBarry Smith x1 = x[idx]; 1127*15091d37SBarry Smith sum1 -= v[0]*x1; 1128*15091d37SBarry Smith v += 1; 1129*15091d37SBarry Smith } 1130*15091d37SBarry Smith v = aa + diag[i]; 1131*15091d37SBarry Smith x[idt] = v[0]*sum1; 1132*15091d37SBarry Smith } 1133*15091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1134*15091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1135*15091d37SBarry Smith PLogFlops(2*(a->nz) - a->n); 1136*15091d37SBarry Smith PetscFunctionReturn(0); 1137*15091d37SBarry Smith } 11384e2b4712SSatish Balay 11394e2b4712SSatish Balay /* ----------------------------------------------------------------*/ 11404e2b4712SSatish Balay /* 11414e2b4712SSatish Balay This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 11424e2b4712SSatish Balay except that the data structure of Mat_SeqAIJ is slightly different. 11434e2b4712SSatish Balay Not a good example of code reuse. 11444e2b4712SSatish Balay */ 1145435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat); 1146435faa5fSBarry Smith 11474e2b4712SSatish Balay #undef __FUNC__ 11484e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 1149435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 11504e2b4712SSatish Balay { 11514e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 11524e2b4712SSatish Balay IS isicol; 11534e2b4712SSatish Balay int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 11544e2b4712SSatish Balay int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 1155335d9088SBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 1156435faa5fSBarry Smith int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill; 11574e2b4712SSatish Balay PetscTruth col_identity, row_identity; 1158435faa5fSBarry Smith double f; 11594e2b4712SSatish Balay 11604e2b4712SSatish Balay PetscFunctionBegin; 1161435faa5fSBarry Smith if (info) { 1162435faa5fSBarry Smith f = info->fill; 1163335d9088SBarry Smith levels = (int) info->levels; 1164335d9088SBarry Smith diagonal_fill = (int) info->diagonal_fill; 1165435faa5fSBarry Smith } else { 1166435faa5fSBarry Smith f = 1.0; 1167435faa5fSBarry Smith levels = 0; 1168435faa5fSBarry Smith diagonal_fill = 0; 1169435faa5fSBarry Smith } 1170e51c0b9cSSatish Balay ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 1171e51c0b9cSSatish Balay 11724e2b4712SSatish Balay /* special case that simply copies fill pattern */ 11734e2b4712SSatish Balay PetscValidHeaderSpecific(isrow,IS_COOKIE); 11744e2b4712SSatish Balay PetscValidHeaderSpecific(iscol,IS_COOKIE); 1175667159a5SBarry Smith ierr = ISIdentity(isrow,&row_identity); CHKERRQ(ierr); 1176667159a5SBarry Smith ierr = ISIdentity(iscol,&col_identity); CHKERRQ(ierr); 11774e2b4712SSatish Balay if (levels == 0 && row_identity && col_identity) { 1178e1293385SBarry Smith ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact); CHKERRQ(ierr); 11794e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 11804e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 11814e2b4712SSatish Balay if (!b->diag) { 11824e2b4712SSatish Balay ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 11834e2b4712SSatish Balay } 1184435faa5fSBarry Smith ierr = MatMissingDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 11854e2b4712SSatish Balay b->row = isrow; 11864e2b4712SSatish Balay b->col = iscol; 1187e51c0b9cSSatish Balay b->icol = isicol; 11884e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 11894e2b4712SSatish Balay /* 1190*15091d37SBarry Smith Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1191*15091d37SBarry Smith for ILU(0) factorization with natural ordering 11924e2b4712SSatish Balay */ 1193*15091d37SBarry Smith switch (b->bs) { 1194*15091d37SBarry Smith case 2: 1195*15091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1196*15091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1197*15091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1198*15091d37SBarry Smith break; 1199*15091d37SBarry Smith case 3: 1200*15091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1201*15091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1202*15091d37SBarry Smith PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1203*15091d37SBarry Smith break; 1204*15091d37SBarry Smith case 4: 1205f830108cSBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1206f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1207*15091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1208*15091d37SBarry Smith break; 1209*15091d37SBarry Smith case 5: 1210667159a5SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1211667159a5SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1212*15091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1213*15091d37SBarry Smith break; 1214*15091d37SBarry Smith case 6: 1215*15091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1216*15091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1217*15091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1218*15091d37SBarry Smith break; 1219*15091d37SBarry Smith case 7: 1220*15091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1221*15091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1222*15091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1223*15091d37SBarry Smith break; 12244e2b4712SSatish Balay } 12254e2b4712SSatish Balay PetscFunctionReturn(0); 12264e2b4712SSatish Balay } 12274e2b4712SSatish Balay 12284e2b4712SSatish Balay ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 12294e2b4712SSatish Balay ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 12304e2b4712SSatish Balay 12314e2b4712SSatish Balay /* get new row pointers */ 12324e2b4712SSatish Balay ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 12334e2b4712SSatish Balay ainew[0] = 0; 12344e2b4712SSatish Balay /* don't know how many column pointers are needed so estimate */ 12354e2b4712SSatish Balay jmax = (int) (f*ai[n] + 1); 12364e2b4712SSatish Balay ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 12374e2b4712SSatish Balay /* ajfill is level of fill for each fill entry */ 12384e2b4712SSatish Balay ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 12394e2b4712SSatish Balay /* fill is a linked list of nonzeros in active row */ 12404e2b4712SSatish Balay fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 12414e2b4712SSatish Balay /* im is level for each filled value */ 12424e2b4712SSatish Balay im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 12434e2b4712SSatish Balay /* dloc is location of diagonal in factor */ 12444e2b4712SSatish Balay dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 12454e2b4712SSatish Balay dloc[0] = 0; 12464e2b4712SSatish Balay for ( prow=0; prow<n; prow++ ) { 1247435faa5fSBarry Smith 1248435faa5fSBarry Smith /* copy prow into linked list */ 12494e2b4712SSatish Balay nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 12504e2b4712SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 12514e2b4712SSatish Balay xi = aj + ai[r[prow]]; 12524e2b4712SSatish Balay fill[n] = n; 1253435faa5fSBarry Smith fill[prow] = -1; /* marker for diagonal entry */ 12544e2b4712SSatish Balay while (nz--) { 12554e2b4712SSatish Balay fm = n; 12564e2b4712SSatish Balay idx = ic[*xi++]; 12574e2b4712SSatish Balay do { 12584e2b4712SSatish Balay m = fm; 12594e2b4712SSatish Balay fm = fill[m]; 12604e2b4712SSatish Balay } while (fm < idx); 12614e2b4712SSatish Balay fill[m] = idx; 12624e2b4712SSatish Balay fill[idx] = fm; 12634e2b4712SSatish Balay im[idx] = 0; 12644e2b4712SSatish Balay } 1265435faa5fSBarry Smith 1266435faa5fSBarry Smith /* make sure diagonal entry is included */ 1267435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 1268435faa5fSBarry Smith fm = n; 1269435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 1270435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1271435faa5fSBarry Smith fill[fm] = prow; 1272435faa5fSBarry Smith im[prow] = 0; 1273435faa5fSBarry Smith nzf++; 1274335d9088SBarry Smith dcount++; 1275435faa5fSBarry Smith } 1276435faa5fSBarry Smith 12774e2b4712SSatish Balay nzi = 0; 12784e2b4712SSatish Balay row = fill[n]; 12794e2b4712SSatish Balay while ( row < prow ) { 12804e2b4712SSatish Balay incrlev = im[row] + 1; 12814e2b4712SSatish Balay nz = dloc[row]; 1282435faa5fSBarry Smith xi = ajnew + ainew[row] + nz + 1; 12834e2b4712SSatish Balay flev = ajfill + ainew[row] + nz + 1; 12844e2b4712SSatish Balay nnz = ainew[row+1] - ainew[row] - nz - 1; 12854e2b4712SSatish Balay fm = row; 12864e2b4712SSatish Balay while (nnz-- > 0) { 12874e2b4712SSatish Balay idx = *xi++; 12884e2b4712SSatish Balay if (*flev + incrlev > levels) { 12894e2b4712SSatish Balay flev++; 12904e2b4712SSatish Balay continue; 12914e2b4712SSatish Balay } 12924e2b4712SSatish Balay do { 12934e2b4712SSatish Balay m = fm; 12944e2b4712SSatish Balay fm = fill[m]; 12954e2b4712SSatish Balay } while (fm < idx); 12964e2b4712SSatish Balay if (fm != idx) { 12974e2b4712SSatish Balay im[idx] = *flev + incrlev; 12984e2b4712SSatish Balay fill[m] = idx; 12994e2b4712SSatish Balay fill[idx] = fm; 13004e2b4712SSatish Balay fm = idx; 13014e2b4712SSatish Balay nzf++; 1302ecf371e4SBarry Smith } else { 13034e2b4712SSatish Balay if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 13044e2b4712SSatish Balay } 13054e2b4712SSatish Balay flev++; 13064e2b4712SSatish Balay } 13074e2b4712SSatish Balay row = fill[row]; 13084e2b4712SSatish Balay nzi++; 13094e2b4712SSatish Balay } 13104e2b4712SSatish Balay /* copy new filled row into permanent storage */ 13114e2b4712SSatish Balay ainew[prow+1] = ainew[prow] + nzf; 13124e2b4712SSatish Balay if (ainew[prow+1] > jmax) { 1313ecf371e4SBarry Smith 1314ecf371e4SBarry Smith /* estimate how much additional space we will need */ 1315ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1316ecf371e4SBarry Smith /* just double the memory each time */ 1317ecf371e4SBarry Smith int maxadd = jmax; 1318ecf371e4SBarry Smith /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 13194e2b4712SSatish Balay if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 13204e2b4712SSatish Balay jmax += maxadd; 1321ecf371e4SBarry Smith 1322ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 13234e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 13244e2b4712SSatish Balay PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int)); 13254e2b4712SSatish Balay PetscFree(ajnew); 13264e2b4712SSatish Balay ajnew = xi; 13274e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 13284e2b4712SSatish Balay PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int)); 13294e2b4712SSatish Balay PetscFree(ajfill); 13304e2b4712SSatish Balay ajfill = xi; 1331ecf371e4SBarry Smith realloc++; /* count how many reallocations are needed */ 13324e2b4712SSatish Balay } 13334e2b4712SSatish Balay xi = ajnew + ainew[prow]; 13344e2b4712SSatish Balay flev = ajfill + ainew[prow]; 13354e2b4712SSatish Balay dloc[prow] = nzi; 13364e2b4712SSatish Balay fm = fill[n]; 13374e2b4712SSatish Balay while (nzf--) { 13384e2b4712SSatish Balay *xi++ = fm; 13394e2b4712SSatish Balay *flev++ = im[fm]; 13404e2b4712SSatish Balay fm = fill[fm]; 13414e2b4712SSatish Balay } 1342435faa5fSBarry Smith /* make sure row has diagonal entry */ 1343435faa5fSBarry Smith if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1344435faa5fSBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1345435faa5fSBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1346435faa5fSBarry Smith } 13474e2b4712SSatish Balay } 13484e2b4712SSatish Balay PetscFree(ajfill); 13494e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 13504e2b4712SSatish Balay ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 13514e2b4712SSatish Balay PetscFree(fill); PetscFree(im); 13524e2b4712SSatish Balay 13534e2b4712SSatish Balay { 13544e2b4712SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 1355981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 13564e2b4712SSatish Balay realloc,f,af); 1357981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 1358981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 1359981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 1360335d9088SBarry Smith if (diagonal_fill) { 1361335d9088SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount); 1362335d9088SBarry Smith } 13634e2b4712SSatish Balay } 13644e2b4712SSatish Balay 13654e2b4712SSatish Balay /* put together the new matrix */ 13664e2b4712SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1367fa6007c9SSatish Balay PLogObjectParent(*fact,isicol); 13684e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 13694e2b4712SSatish Balay PetscFree(b->imax); 13704e2b4712SSatish Balay b->singlemalloc = 0; 13713f1db9ecSBarry Smith len = bs2*ainew[n]*sizeof(MatScalar); 13724e2b4712SSatish Balay /* the next line frees the default space generated by the Create() */ 13734e2b4712SSatish Balay PetscFree(b->a); PetscFree(b->ilen); 13743f1db9ecSBarry Smith b->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a); 13754e2b4712SSatish Balay b->j = ajnew; 13764e2b4712SSatish Balay b->i = ainew; 13774e2b4712SSatish Balay for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 13784e2b4712SSatish Balay b->diag = dloc; 13794e2b4712SSatish Balay b->ilen = 0; 13804e2b4712SSatish Balay b->imax = 0; 13814e2b4712SSatish Balay b->row = isrow; 13824e2b4712SSatish Balay b->col = iscol; 1383e51c0b9cSSatish Balay b->icol = isicol; 13844e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 13854e2b4712SSatish Balay /* In b structure: Free imax, ilen, old a, old j. 13864e2b4712SSatish Balay Allocate dloc, solve_work, new a, new j */ 13874e2b4712SSatish Balay PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 13884e2b4712SSatish Balay b->maxnz = b->nz = ainew[n]; 13894e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 13904e2b4712SSatish Balay 13914e2b4712SSatish Balay (*fact)->info.factor_mallocs = realloc; 13924e2b4712SSatish Balay (*fact)->info.fill_ratio_given = f; 13934e2b4712SSatish Balay (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 13944e2b4712SSatish Balay 13954e2b4712SSatish Balay PetscFunctionReturn(0); 13964e2b4712SSatish Balay } 13974e2b4712SSatish Balay 13984e2b4712SSatish Balay 13994e2b4712SSatish Balay 14004e2b4712SSatish Balay 1401