14e2b4712SSatish Balay #ifdef PETSC_RCS_HEADER 2*606d414cSSatish Balay static char vcid[] = "$Id: baijfact2.c,v 1.28 1999/05/12 03:29:44 bsmith Exp balay $"; 34e2b4712SSatish Balay #endif 44e2b4712SSatish Balay /* 54e2b4712SSatish Balay Factorization code for BAIJ format. 64e2b4712SSatish Balay */ 74e2b4712SSatish Balay 84e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h" 94e2b4712SSatish Balay #include "src/vec/vecimpl.h" 104e2b4712SSatish Balay #include "src/inline/ilu.h" 1174c49faeSBarry Smith #include "src/inline/dot.h" 124e2b4712SSatish Balay 134e2b4712SSatish Balay /* ----------------------------------------------------------- */ 144e2b4712SSatish Balay #undef __FUNC__ 154e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N" 164e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 174e2b4712SSatish Balay { 184e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 194e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 204e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 214e2b4712SSatish Balay int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 223f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 233f1db9ecSBarry Smith Scalar *x,*b,*sum,*tmp,*lsum; 244e2b4712SSatish Balay 254e2b4712SSatish Balay PetscFunctionBegin; 26e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 284e2b4712SSatish Balay tmp = a->solve_work; 294e2b4712SSatish Balay 304e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 314e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 324e2b4712SSatish Balay 334e2b4712SSatish Balay /* forward solve the lower triangular */ 34549d3d68SSatish Balay ierr = PetscMemcpy(tmp,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 354e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 364e2b4712SSatish Balay v = aa + bs2*ai[i]; 374e2b4712SSatish Balay vi = aj + ai[i]; 384e2b4712SSatish Balay nz = a->diag[i] - ai[i]; 394e2b4712SSatish Balay sum = tmp + bs*i; 40962fb4a0SBarry Smith ierr = PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 414e2b4712SSatish Balay while (nz--) { 424e2b4712SSatish Balay Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++)); 434e2b4712SSatish Balay v += bs2; 444e2b4712SSatish Balay } 454e2b4712SSatish Balay } 464e2b4712SSatish Balay /* backward solve the upper triangular */ 474e2b4712SSatish Balay lsum = a->solve_work + a->n; 484e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 494e2b4712SSatish Balay v = aa + bs2*(a->diag[i] + 1); 504e2b4712SSatish Balay vi = aj + a->diag[i] + 1; 514e2b4712SSatish Balay nz = ai[i+1] - a->diag[i] - 1; 52549d3d68SSatish Balay ierr = PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 534e2b4712SSatish Balay while (nz--) { 544e2b4712SSatish Balay Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++)); 554e2b4712SSatish Balay v += bs2; 564e2b4712SSatish Balay } 574e2b4712SSatish Balay Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs); 58549d3d68SSatish Balay ierr = PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 594e2b4712SSatish Balay } 604e2b4712SSatish Balay 614e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 624e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 63e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 64e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6542015d63SBarry Smith PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 664e2b4712SSatish Balay PetscFunctionReturn(0); 674e2b4712SSatish Balay } 684e2b4712SSatish Balay 694e2b4712SSatish Balay #undef __FUNC__ 704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7" 714e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 724e2b4712SSatish Balay { 734e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 744e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 754e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 764e2b4712SSatish Balay int *diag = a->diag; 773f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 783f1db9ecSBarry Smith Scalar sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 793f1db9ecSBarry Smith Scalar *x,*b,*tmp; 804e2b4712SSatish Balay 814e2b4712SSatish Balay PetscFunctionBegin; 82e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 83e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 844e2b4712SSatish Balay tmp = a->solve_work; 854e2b4712SSatish Balay 864e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 874e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 884e2b4712SSatish Balay 894e2b4712SSatish Balay /* forward solve the lower triangular */ 904e2b4712SSatish Balay idx = 7*(*r++); 914e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 924e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 934e2b4712SSatish Balay tmp[5] = b[5+idx]; tmp[6] = b[6+idx]; 944e2b4712SSatish Balay 954e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 964e2b4712SSatish Balay v = aa + 49*ai[i]; 974e2b4712SSatish Balay vi = aj + ai[i]; 984e2b4712SSatish Balay nz = diag[i] - ai[i]; 994e2b4712SSatish Balay idx = 7*(*r++); 1004e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 1014e2b4712SSatish Balay sum5 = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx]; 1024e2b4712SSatish Balay while (nz--) { 1034e2b4712SSatish Balay idx = 7*(*vi++); 1044e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 1054e2b4712SSatish Balay x4 = tmp[3+idx];x5 = tmp[4+idx]; 1064e2b4712SSatish Balay x6 = tmp[5+idx];x7 = tmp[6+idx]; 1074e2b4712SSatish Balay sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1084e2b4712SSatish Balay sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1094e2b4712SSatish Balay sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1104e2b4712SSatish Balay sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1114e2b4712SSatish Balay sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1124e2b4712SSatish Balay sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1134e2b4712SSatish Balay sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1144e2b4712SSatish Balay v += 49; 1154e2b4712SSatish Balay } 1164e2b4712SSatish Balay idx = 7*i; 1174e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 1184e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 1194e2b4712SSatish Balay tmp[5+idx] = sum6;tmp[6+idx] = sum7; 1204e2b4712SSatish Balay } 1214e2b4712SSatish Balay /* backward solve the upper triangular */ 1224e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 1234e2b4712SSatish Balay v = aa + 49*diag[i] + 49; 1244e2b4712SSatish Balay vi = aj + diag[i] + 1; 1254e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 1264e2b4712SSatish Balay idt = 7*i; 1274e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 1284e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 1294e2b4712SSatish Balay sum6 = tmp[5+idt];sum7 = tmp[6+idt]; 1304e2b4712SSatish Balay while (nz--) { 1314e2b4712SSatish Balay idx = 7*(*vi++); 1324e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 1334e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 1344e2b4712SSatish Balay x6 = tmp[5+idx]; x7 = tmp[6+idx]; 1354e2b4712SSatish Balay sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1364e2b4712SSatish Balay sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1374e2b4712SSatish Balay sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1384e2b4712SSatish Balay sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1394e2b4712SSatish Balay sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1404e2b4712SSatish Balay sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1414e2b4712SSatish Balay sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1424e2b4712SSatish Balay v += 49; 1434e2b4712SSatish Balay } 1444e2b4712SSatish Balay idc = 7*(*c--); 1454e2b4712SSatish Balay v = aa + 49*diag[i]; 1464e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[7]*sum2+v[14]*sum3+ 1474e2b4712SSatish Balay v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7; 1484e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+ 1494e2b4712SSatish Balay v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7; 1504e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+ 1514e2b4712SSatish Balay v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7; 1524e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+ 1534e2b4712SSatish Balay v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7; 1544e2b4712SSatish Balay x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+ 1554e2b4712SSatish Balay v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7; 1564e2b4712SSatish Balay x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+ 1574e2b4712SSatish Balay v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7; 1584e2b4712SSatish Balay x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+ 1594e2b4712SSatish Balay v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7; 1604e2b4712SSatish Balay } 1614e2b4712SSatish Balay 1624e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1634e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 164e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 165e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 16642015d63SBarry Smith PLogFlops(2*49*(a->nz) - 7*a->n); 1674e2b4712SSatish Balay PetscFunctionReturn(0); 1684e2b4712SSatish Balay } 1694e2b4712SSatish Balay 1704e2b4712SSatish Balay #undef __FUNC__ 17115091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering" 17215091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 17315091d37SBarry Smith { 17415091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 17515091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 17615091d37SBarry Smith int ierr,*diag = a->diag,jdx; 17715091d37SBarry Smith MatScalar *aa=a->a,*v; 17815091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 17915091d37SBarry Smith 18015091d37SBarry Smith PetscFunctionBegin; 18115091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 18215091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18315091d37SBarry Smith /* forward solve the lower triangular */ 18415091d37SBarry Smith idx = 0; 18515091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 18615091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 18715091d37SBarry Smith x[6] = b[6+idx]; 18815091d37SBarry Smith for ( i=1; i<n; i++ ) { 18915091d37SBarry Smith v = aa + 49*ai[i]; 19015091d37SBarry Smith vi = aj + ai[i]; 19115091d37SBarry Smith nz = diag[i] - ai[i]; 19215091d37SBarry Smith idx = 7*i; 19315091d37SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 19415091d37SBarry Smith sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 19515091d37SBarry Smith sum7 = b[6+idx]; 19615091d37SBarry Smith while (nz--) { 19715091d37SBarry Smith jdx = 7*(*vi++); 19815091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 19915091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 20015091d37SBarry Smith x7 = x[6+jdx]; 20115091d37SBarry Smith sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 20215091d37SBarry Smith sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 20315091d37SBarry Smith sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 20415091d37SBarry Smith sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 20515091d37SBarry Smith sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 20615091d37SBarry Smith sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 20715091d37SBarry Smith sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 20815091d37SBarry Smith v += 49; 20915091d37SBarry Smith } 21015091d37SBarry Smith x[idx] = sum1; 21115091d37SBarry Smith x[1+idx] = sum2; 21215091d37SBarry Smith x[2+idx] = sum3; 21315091d37SBarry Smith x[3+idx] = sum4; 21415091d37SBarry Smith x[4+idx] = sum5; 21515091d37SBarry Smith x[5+idx] = sum6; 21615091d37SBarry Smith x[6+idx] = sum7; 21715091d37SBarry Smith } 21815091d37SBarry Smith /* backward solve the upper triangular */ 21915091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 22015091d37SBarry Smith v = aa + 49*diag[i] + 49; 22115091d37SBarry Smith vi = aj + diag[i] + 1; 22215091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 22315091d37SBarry Smith idt = 7*i; 22415091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 22515091d37SBarry Smith sum3 = x[2+idt]; sum4 = x[3+idt]; 22615091d37SBarry Smith sum5 = x[4+idt]; sum6 = x[5+idt]; 22715091d37SBarry Smith sum7 = x[6+idt]; 22815091d37SBarry Smith while (nz--) { 22915091d37SBarry Smith idx = 7*(*vi++); 23015091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 23115091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 23215091d37SBarry Smith x7 = x[6+idx]; 23315091d37SBarry Smith sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 23415091d37SBarry Smith sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 23515091d37SBarry Smith sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 23615091d37SBarry Smith sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 23715091d37SBarry Smith sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 23815091d37SBarry Smith sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 23915091d37SBarry Smith sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 24015091d37SBarry Smith v += 49; 24115091d37SBarry Smith } 24215091d37SBarry Smith v = aa + 49*diag[i]; 24315091d37SBarry Smith x[idt] = v[0]*sum1 + v[7]*sum2 + v[14]*sum3 + v[21]*sum4 24415091d37SBarry Smith + v[28]*sum5 + v[35]*sum6 + v[42]*sum7; 24515091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[8]*sum2 + v[15]*sum3 + v[22]*sum4 24615091d37SBarry Smith + v[29]*sum5 + v[36]*sum6 + v[43]*sum7; 24715091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[9]*sum2 + v[16]*sum3 + v[23]*sum4 24815091d37SBarry Smith + v[30]*sum5 + v[37]*sum6 + v[44]*sum7; 24915091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[10]*sum2 + v[17]*sum3 + v[24]*sum4 25015091d37SBarry Smith + v[31]*sum5 + v[38]*sum6 + v[45]*sum7; 25115091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[11]*sum2 + v[18]*sum3 + v[25]*sum4 25215091d37SBarry Smith + v[32]*sum5 + v[39]*sum6 + v[46]*sum7; 25315091d37SBarry Smith x[5+idt] = v[5]*sum1 + v[12]*sum2 + v[19]*sum3 + v[26]*sum4 25415091d37SBarry Smith + v[33]*sum5 + v[40]*sum6 + v[47]*sum7; 25515091d37SBarry Smith x[6+idt] = v[6]*sum1 + v[13]*sum2 + v[20]*sum3 + v[27]*sum4 25615091d37SBarry Smith + v[34]*sum5 + v[41]*sum6 + v[48]*sum7; 25715091d37SBarry Smith } 25815091d37SBarry Smith 25915091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 26015091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 26115091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 26215091d37SBarry Smith PetscFunctionReturn(0); 26315091d37SBarry Smith } 26415091d37SBarry Smith 26515091d37SBarry Smith #undef __FUNC__ 26615091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6" 26715091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 26815091d37SBarry Smith { 26915091d37SBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 27015091d37SBarry Smith IS iscol=a->col,isrow=a->row; 27115091d37SBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 27215091d37SBarry Smith int *diag = a->diag; 27315091d37SBarry Smith MatScalar *aa=a->a,*v; 27415091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp; 27515091d37SBarry Smith 27615091d37SBarry Smith PetscFunctionBegin; 27715091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27815091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 27915091d37SBarry Smith tmp = a->solve_work; 28015091d37SBarry Smith 28115091d37SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 28215091d37SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 28315091d37SBarry Smith 28415091d37SBarry Smith /* forward solve the lower triangular */ 28515091d37SBarry Smith idx = 6*(*r++); 28615091d37SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; 28715091d37SBarry Smith tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 28815091d37SBarry Smith tmp[4] = b[4+idx]; tmp[5] = b[5+idx]; 28915091d37SBarry Smith for ( i=1; i<n; i++ ) { 29015091d37SBarry Smith v = aa + 36*ai[i]; 29115091d37SBarry Smith vi = aj + ai[i]; 29215091d37SBarry Smith nz = diag[i] - ai[i]; 29315091d37SBarry Smith idx = 6*(*r++); 29415091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 29515091d37SBarry Smith sum5 = b[4+idx]; sum6 = b[5+idx]; 29615091d37SBarry Smith while (nz--) { 29715091d37SBarry Smith idx = 6*(*vi++); 29815091d37SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 29915091d37SBarry Smith x4 = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx]; 30015091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 30115091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 30215091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 30315091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 30415091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 30515091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 30615091d37SBarry Smith v += 36; 30715091d37SBarry Smith } 30815091d37SBarry Smith idx = 6*i; 30915091d37SBarry Smith tmp[idx] = sum1;tmp[1+idx] = sum2; 31015091d37SBarry Smith tmp[2+idx] = sum3;tmp[3+idx] = sum4; 31115091d37SBarry Smith tmp[4+idx] = sum5;tmp[5+idx] = sum6; 31215091d37SBarry Smith } 31315091d37SBarry Smith /* backward solve the upper triangular */ 31415091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 31515091d37SBarry Smith v = aa + 36*diag[i] + 36; 31615091d37SBarry Smith vi = aj + diag[i] + 1; 31715091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 31815091d37SBarry Smith idt = 6*i; 31915091d37SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; 32015091d37SBarry Smith sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 32115091d37SBarry Smith sum5 = tmp[4+idt];sum6 = tmp[5+idt]; 32215091d37SBarry Smith while (nz--) { 32315091d37SBarry Smith idx = 6*(*vi++); 32415091d37SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 32515091d37SBarry Smith x3 = tmp[2+idx]; x4 = tmp[3+idx]; 32615091d37SBarry Smith x5 = tmp[4+idx]; x6 = tmp[5+idx]; 32715091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 32815091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 32915091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 33015091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 33115091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 33215091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 33315091d37SBarry Smith v += 36; 33415091d37SBarry Smith } 33515091d37SBarry Smith idc = 6*(*c--); 33615091d37SBarry Smith v = aa + 36*diag[i]; 33715091d37SBarry Smith x[idc] = tmp[idt] = v[0]*sum1+v[6]*sum2+v[12]*sum3+ 33815091d37SBarry Smith v[18]*sum4+v[24]*sum5+v[30]*sum6; 33915091d37SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+ 34015091d37SBarry Smith v[19]*sum4+v[25]*sum5+v[31]*sum6; 34115091d37SBarry Smith x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+ 34215091d37SBarry Smith v[20]*sum4+v[26]*sum5+v[32]*sum6; 34315091d37SBarry Smith x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+ 34415091d37SBarry Smith v[21]*sum4+v[27]*sum5+v[33]*sum6; 34515091d37SBarry Smith x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+ 34615091d37SBarry Smith v[22]*sum4+v[28]*sum5+v[34]*sum6; 34715091d37SBarry Smith x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+ 34815091d37SBarry Smith v[23]*sum4+v[29]*sum5+v[35]*sum6; 34915091d37SBarry Smith } 35015091d37SBarry Smith 35115091d37SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 35215091d37SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 35315091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 35415091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 35515091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 35615091d37SBarry Smith PetscFunctionReturn(0); 35715091d37SBarry Smith } 35815091d37SBarry Smith 35915091d37SBarry Smith #undef __FUNC__ 36015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering" 36115091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 36215091d37SBarry Smith { 36315091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 36415091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 36515091d37SBarry Smith int ierr,*diag = a->diag,jdx; 36615091d37SBarry Smith MatScalar *aa=a->a,*v; 36715091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6; 36815091d37SBarry Smith 36915091d37SBarry Smith PetscFunctionBegin; 37015091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 37115091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 37215091d37SBarry Smith /* forward solve the lower triangular */ 37315091d37SBarry Smith idx = 0; 37415091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 37515091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 37615091d37SBarry Smith for ( i=1; i<n; i++ ) { 37715091d37SBarry Smith v = aa + 36*ai[i]; 37815091d37SBarry Smith vi = aj + ai[i]; 37915091d37SBarry Smith nz = diag[i] - ai[i]; 38015091d37SBarry Smith idx = 6*i; 38115091d37SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 38215091d37SBarry Smith sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 38315091d37SBarry Smith while (nz--) { 38415091d37SBarry Smith jdx = 6*(*vi++); 38515091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 38615091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 38715091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 38815091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 38915091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 39015091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 39115091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 39215091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 39315091d37SBarry Smith v += 36; 39415091d37SBarry Smith } 39515091d37SBarry Smith x[idx] = sum1; 39615091d37SBarry Smith x[1+idx] = sum2; 39715091d37SBarry Smith x[2+idx] = sum3; 39815091d37SBarry Smith x[3+idx] = sum4; 39915091d37SBarry Smith x[4+idx] = sum5; 40015091d37SBarry Smith x[5+idx] = sum6; 40115091d37SBarry Smith } 40215091d37SBarry Smith /* backward solve the upper triangular */ 40315091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 40415091d37SBarry Smith v = aa + 36*diag[i] + 36; 40515091d37SBarry Smith vi = aj + diag[i] + 1; 40615091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 40715091d37SBarry Smith idt = 6*i; 40815091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 40915091d37SBarry Smith sum3 = x[2+idt]; sum4 = x[3+idt]; 41015091d37SBarry Smith sum5 = x[4+idt]; sum6 = x[5+idt]; 41115091d37SBarry Smith while (nz--) { 41215091d37SBarry Smith idx = 6*(*vi++); 41315091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 41415091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 41515091d37SBarry Smith sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 41615091d37SBarry Smith sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 41715091d37SBarry Smith sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 41815091d37SBarry Smith sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 41915091d37SBarry Smith sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 42015091d37SBarry Smith sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 42115091d37SBarry Smith v += 36; 42215091d37SBarry Smith } 42315091d37SBarry Smith v = aa + 36*diag[i]; 42415091d37SBarry Smith x[idt] = v[0]*sum1 + v[6]*sum2 + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6; 42515091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[7]*sum2 + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6; 42615091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[8]*sum2 + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6; 42715091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[9]*sum2 + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6; 42815091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6; 42915091d37SBarry Smith x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6; 43015091d37SBarry Smith } 43115091d37SBarry Smith 43215091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 43315091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 43415091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 43515091d37SBarry Smith PetscFunctionReturn(0); 43615091d37SBarry Smith } 43715091d37SBarry Smith 43815091d37SBarry Smith #undef __FUNC__ 4394e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5" 4404e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 4414e2b4712SSatish Balay { 4424e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4434e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 4444e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 4454e2b4712SSatish Balay int *diag = a->diag; 4463f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 4473f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp; 4484e2b4712SSatish Balay 4494e2b4712SSatish Balay PetscFunctionBegin; 450e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 451e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4524e2b4712SSatish Balay tmp = a->solve_work; 4534e2b4712SSatish Balay 4544e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 4554e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 4564e2b4712SSatish Balay 4574e2b4712SSatish Balay /* forward solve the lower triangular */ 4584e2b4712SSatish Balay idx = 5*(*r++); 4594e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 4604e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 4614e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 4624e2b4712SSatish Balay v = aa + 25*ai[i]; 4634e2b4712SSatish Balay vi = aj + ai[i]; 4644e2b4712SSatish Balay nz = diag[i] - ai[i]; 4654e2b4712SSatish Balay idx = 5*(*r++); 4664e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 4674e2b4712SSatish Balay sum5 = b[4+idx]; 4684e2b4712SSatish Balay while (nz--) { 4694e2b4712SSatish Balay idx = 5*(*vi++); 4704e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 4714e2b4712SSatish Balay x4 = tmp[3+idx];x5 = tmp[4+idx]; 4724e2b4712SSatish Balay sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 4734e2b4712SSatish Balay sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 4744e2b4712SSatish Balay sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 4754e2b4712SSatish Balay sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 4764e2b4712SSatish Balay sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 4774e2b4712SSatish Balay v += 25; 4784e2b4712SSatish Balay } 4794e2b4712SSatish Balay idx = 5*i; 4804e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 4814e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 4824e2b4712SSatish Balay } 4834e2b4712SSatish Balay /* backward solve the upper triangular */ 4844e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 4854e2b4712SSatish Balay v = aa + 25*diag[i] + 25; 4864e2b4712SSatish Balay vi = aj + diag[i] + 1; 4874e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 4884e2b4712SSatish Balay idt = 5*i; 4894e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 4904e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 4914e2b4712SSatish Balay while (nz--) { 4924e2b4712SSatish Balay idx = 5*(*vi++); 4934e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 4944e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 4954e2b4712SSatish Balay sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 4964e2b4712SSatish Balay sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 4974e2b4712SSatish Balay sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 4984e2b4712SSatish Balay sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 4994e2b4712SSatish Balay sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 5004e2b4712SSatish Balay v += 25; 5014e2b4712SSatish Balay } 5024e2b4712SSatish Balay idc = 5*(*c--); 5034e2b4712SSatish Balay v = aa + 25*diag[i]; 5044e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 5054e2b4712SSatish Balay v[15]*sum4+v[20]*sum5; 5064e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 5074e2b4712SSatish Balay v[16]*sum4+v[21]*sum5; 5084e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 5094e2b4712SSatish Balay v[17]*sum4+v[22]*sum5; 5104e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 5114e2b4712SSatish Balay v[18]*sum4+v[23]*sum5; 5124e2b4712SSatish Balay x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 5134e2b4712SSatish Balay v[19]*sum4+v[24]*sum5; 5144e2b4712SSatish Balay } 5154e2b4712SSatish Balay 5164e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 5174e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 518e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 519e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 52042015d63SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 5214e2b4712SSatish Balay PetscFunctionReturn(0); 5224e2b4712SSatish Balay } 5234e2b4712SSatish Balay 5244e2b4712SSatish Balay #undef __FUNC__ 52515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 52615091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 52715091d37SBarry Smith { 52815091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 52915091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 53015091d37SBarry Smith int ierr,*diag = a->diag,jdx; 53115091d37SBarry Smith MatScalar *aa=a->a,*v; 53215091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;; 53315091d37SBarry Smith 53415091d37SBarry Smith PetscFunctionBegin; 53515091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 53615091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 53715091d37SBarry Smith /* forward solve the lower triangular */ 53815091d37SBarry Smith idx = 0; 53915091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 54015091d37SBarry Smith for ( i=1; i<n; i++ ) { 54115091d37SBarry Smith v = aa + 25*ai[i]; 54215091d37SBarry Smith vi = aj + ai[i]; 54315091d37SBarry Smith nz = diag[i] - ai[i]; 54415091d37SBarry Smith idx = 5*i; 54515091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx]; 54615091d37SBarry Smith while (nz--) { 54715091d37SBarry Smith jdx = 5*(*vi++); 54815091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 54915091d37SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 55015091d37SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 55115091d37SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 55215091d37SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 55315091d37SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 55415091d37SBarry Smith v += 25; 55515091d37SBarry Smith } 55615091d37SBarry Smith x[idx] = sum1; 55715091d37SBarry Smith x[1+idx] = sum2; 55815091d37SBarry Smith x[2+idx] = sum3; 55915091d37SBarry Smith x[3+idx] = sum4; 56015091d37SBarry Smith x[4+idx] = sum5; 56115091d37SBarry Smith } 56215091d37SBarry Smith /* backward solve the upper triangular */ 56315091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 56415091d37SBarry Smith v = aa + 25*diag[i] + 25; 56515091d37SBarry Smith vi = aj + diag[i] + 1; 56615091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 56715091d37SBarry Smith idt = 5*i; 56815091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 56915091d37SBarry Smith sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt]; 57015091d37SBarry Smith while (nz--) { 57115091d37SBarry Smith idx = 5*(*vi++); 57215091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 57315091d37SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 57415091d37SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 57515091d37SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 57615091d37SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 57715091d37SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 57815091d37SBarry Smith v += 25; 57915091d37SBarry Smith } 58015091d37SBarry Smith v = aa + 25*diag[i]; 58115091d37SBarry Smith x[idt] = v[0]*sum1 + v[5]*sum2 + v[10]*sum3 + v[15]*sum4 + v[20]*sum5; 58215091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3 + v[16]*sum4 + v[21]*sum5; 58315091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3 + v[17]*sum4 + v[22]*sum5; 58415091d37SBarry Smith x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3 + v[18]*sum4 + v[23]*sum5; 58515091d37SBarry Smith x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3 + v[19]*sum4 + v[24]*sum5; 58615091d37SBarry Smith } 58715091d37SBarry Smith 58815091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 58915091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 59015091d37SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 59115091d37SBarry Smith PetscFunctionReturn(0); 59215091d37SBarry Smith } 59315091d37SBarry Smith 59415091d37SBarry Smith #undef __FUNC__ 5954e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4" 5964e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 5974e2b4712SSatish Balay { 5984e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 5994e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 6004e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 6014e2b4712SSatish Balay int *diag = a->diag; 6023f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 6033f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp; 6044e2b4712SSatish Balay 6054e2b4712SSatish Balay PetscFunctionBegin; 606e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 607e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6084e2b4712SSatish Balay tmp = a->solve_work; 6094e2b4712SSatish Balay 6104e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 6114e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 6124e2b4712SSatish Balay 6134e2b4712SSatish Balay /* forward solve the lower triangular */ 6144e2b4712SSatish Balay idx = 4*(*r++); 6154e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 6164e2b4712SSatish Balay tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 6174e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 6184e2b4712SSatish Balay v = aa + 16*ai[i]; 6194e2b4712SSatish Balay vi = aj + ai[i]; 6204e2b4712SSatish Balay nz = diag[i] - ai[i]; 6214e2b4712SSatish Balay idx = 4*(*r++); 6224e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 6234e2b4712SSatish Balay while (nz--) { 6244e2b4712SSatish Balay idx = 4*(*vi++); 6254e2b4712SSatish Balay x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 6264e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 6274e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 6284e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 6294e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 6304e2b4712SSatish Balay v += 16; 6314e2b4712SSatish Balay } 6324e2b4712SSatish Balay idx = 4*i; 6334e2b4712SSatish Balay tmp[idx] = sum1;tmp[1+idx] = sum2; 6344e2b4712SSatish Balay tmp[2+idx] = sum3;tmp[3+idx] = sum4; 6354e2b4712SSatish Balay } 6364e2b4712SSatish Balay /* backward solve the upper triangular */ 6374e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 6384e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 6394e2b4712SSatish Balay vi = aj + diag[i] + 1; 6404e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 6414e2b4712SSatish Balay idt = 4*i; 6424e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 6434e2b4712SSatish Balay sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 6444e2b4712SSatish Balay while (nz--) { 6454e2b4712SSatish Balay idx = 4*(*vi++); 6464e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 6474e2b4712SSatish Balay x3 = tmp[2+idx]; x4 = tmp[3+idx]; 6484e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 6494e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 6504e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 6514e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 6524e2b4712SSatish Balay v += 16; 6534e2b4712SSatish Balay } 6544e2b4712SSatish Balay idc = 4*(*c--); 6554e2b4712SSatish Balay v = aa + 16*diag[i]; 6564e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 6574e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 6584e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 6594e2b4712SSatish Balay x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 6604e2b4712SSatish Balay } 6614e2b4712SSatish Balay 6624e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 6634e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 664e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 665e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 66642015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 6674e2b4712SSatish Balay PetscFunctionReturn(0); 6684e2b4712SSatish Balay } 6690ef38995SBarry Smith 6700ef38995SBarry Smith 6714e2b4712SSatish Balay /* 6724e2b4712SSatish Balay Special case where the matrix was ILU(0) factored in the natural 6734e2b4712SSatish Balay ordering. This eliminates the need for the column and row permutation. 6744e2b4712SSatish Balay */ 6754e2b4712SSatish Balay #undef __FUNC__ 6764e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 6774e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 6784e2b4712SSatish Balay { 6794e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 68030d4dcafSBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 68130d4dcafSBarry Smith int ierr,*diag = a->diag; 6823f1db9ecSBarry Smith MatScalar *aa=a->a; 68330d4dcafSBarry Smith Scalar *x,*b; 6844e2b4712SSatish Balay 6854e2b4712SSatish Balay PetscFunctionBegin; 686e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 687e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 6884e2b4712SSatish Balay 689aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 6902853dc0eSBarry Smith { 6912853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 6922853dc0eSBarry Smith fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 6932853dc0eSBarry Smith } 694aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ) 6952853dc0eSBarry Smith { 6962853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 6972853dc0eSBarry Smith fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 6982853dc0eSBarry Smith } 699aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 7002853dc0eSBarry Smith fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 701e1293385SBarry Smith #else 70230d4dcafSBarry Smith { 7033f1db9ecSBarry Smith Scalar sum1,sum2,sum3,sum4,x1,x2,x3,x4; 7043f1db9ecSBarry Smith MatScalar *v; 70530d4dcafSBarry Smith int jdx,idt,idx,nz,*vi,i; 706e1293385SBarry Smith 7074e2b4712SSatish Balay /* forward solve the lower triangular */ 7084e2b4712SSatish Balay idx = 0; 709e1293385SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 7104e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 7114e2b4712SSatish Balay v = aa + 16*ai[i]; 7124e2b4712SSatish Balay vi = aj + ai[i]; 7134e2b4712SSatish Balay nz = diag[i] - ai[i]; 714e1293385SBarry Smith idx += 4; 7154e2b4712SSatish Balay sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 7164e2b4712SSatish Balay while (nz--) { 7174e2b4712SSatish Balay jdx = 4*(*vi++); 7184e2b4712SSatish Balay x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 7194e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 7204e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 7214e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 7224e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 7234e2b4712SSatish Balay v += 16; 7244e2b4712SSatish Balay } 7254e2b4712SSatish Balay x[idx] = sum1; 7264e2b4712SSatish Balay x[1+idx] = sum2; 7274e2b4712SSatish Balay x[2+idx] = sum3; 7284e2b4712SSatish Balay x[3+idx] = sum4; 7294e2b4712SSatish Balay } 7304e2b4712SSatish Balay /* backward solve the upper triangular */ 7314e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 7324e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 7334e2b4712SSatish Balay vi = aj + diag[i] + 1; 7344e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 7354e2b4712SSatish Balay idt = 4*i; 7364e2b4712SSatish Balay sum1 = x[idt]; sum2 = x[1+idt]; 7374e2b4712SSatish Balay sum3 = x[2+idt];sum4 = x[3+idt]; 7384e2b4712SSatish Balay while (nz--) { 7394e2b4712SSatish Balay idx = 4*(*vi++); 7404e2b4712SSatish Balay x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 7414e2b4712SSatish Balay sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 7424e2b4712SSatish Balay sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 7434e2b4712SSatish Balay sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 7444e2b4712SSatish Balay sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 7454e2b4712SSatish Balay v += 16; 7464e2b4712SSatish Balay } 7474e2b4712SSatish Balay v = aa + 16*diag[i]; 7484e2b4712SSatish Balay x[idt] = v[0]*sum1 + v[4]*sum2 + v[8]*sum3 + v[12]*sum4; 7494e2b4712SSatish Balay x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3 + v[13]*sum4; 7504e2b4712SSatish Balay x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4; 7514e2b4712SSatish Balay x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4; 7524e2b4712SSatish Balay } 75330d4dcafSBarry Smith } 754e1293385SBarry Smith #endif 7554e2b4712SSatish Balay 756e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 757e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 75842015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 7594e2b4712SSatish Balay PetscFunctionReturn(0); 7604e2b4712SSatish Balay } 7614e2b4712SSatish Balay 762667159a5SBarry Smith #undef __FUNC__ 7634e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3" 7644e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 7654e2b4712SSatish Balay { 7664e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 7674e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 7684e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 7694e2b4712SSatish Balay int *diag = a->diag; 7703f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 7713f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp; 7724e2b4712SSatish Balay 7734e2b4712SSatish Balay PetscFunctionBegin; 774e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 775e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7764e2b4712SSatish Balay tmp = a->solve_work; 7774e2b4712SSatish Balay 7784e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 7794e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 7804e2b4712SSatish Balay 7814e2b4712SSatish Balay /* forward solve the lower triangular */ 7824e2b4712SSatish Balay idx = 3*(*r++); 7834e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 7844e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 7854e2b4712SSatish Balay v = aa + 9*ai[i]; 7864e2b4712SSatish Balay vi = aj + ai[i]; 7874e2b4712SSatish Balay nz = diag[i] - ai[i]; 7884e2b4712SSatish Balay idx = 3*(*r++); 7894e2b4712SSatish Balay sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 7904e2b4712SSatish Balay while (nz--) { 7914e2b4712SSatish Balay idx = 3*(*vi++); 7924e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 7934e2b4712SSatish Balay sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 7944e2b4712SSatish Balay sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 7954e2b4712SSatish Balay sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 7964e2b4712SSatish Balay v += 9; 7974e2b4712SSatish Balay } 7984e2b4712SSatish Balay idx = 3*i; 7994e2b4712SSatish Balay tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 8004e2b4712SSatish Balay } 8014e2b4712SSatish Balay /* backward solve the upper triangular */ 8024e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 8034e2b4712SSatish Balay v = aa + 9*diag[i] + 9; 8044e2b4712SSatish Balay vi = aj + diag[i] + 1; 8054e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 8064e2b4712SSatish Balay idt = 3*i; 8074e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 8084e2b4712SSatish Balay while (nz--) { 8094e2b4712SSatish Balay idx = 3*(*vi++); 8104e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 8114e2b4712SSatish Balay sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 8124e2b4712SSatish Balay sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 8134e2b4712SSatish Balay sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 8144e2b4712SSatish Balay v += 9; 8154e2b4712SSatish Balay } 8164e2b4712SSatish Balay idc = 3*(*c--); 8174e2b4712SSatish Balay v = aa + 9*diag[i]; 8184e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 8194e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 8204e2b4712SSatish Balay x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 8214e2b4712SSatish Balay } 8224e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 8234e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 824e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 825e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 82642015d63SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 8274e2b4712SSatish Balay PetscFunctionReturn(0); 8284e2b4712SSatish Balay } 8294e2b4712SSatish Balay 83015091d37SBarry Smith /* 83115091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 83215091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 83315091d37SBarry Smith */ 83415091d37SBarry Smith #undef __FUNC__ 83515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering" 83615091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 83715091d37SBarry Smith { 83815091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 83915091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 84015091d37SBarry Smith int ierr,*diag = a->diag; 84115091d37SBarry Smith MatScalar *aa=a->a, *v; 84215091d37SBarry Smith Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3; 84315091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 84415091d37SBarry Smith 84515091d37SBarry Smith PetscFunctionBegin; 84615091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 84715091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 84815091d37SBarry Smith 84915091d37SBarry Smith 85015091d37SBarry Smith /* forward solve the lower triangular */ 85115091d37SBarry Smith idx = 0; 85215091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; 85315091d37SBarry Smith for ( i=1; i<n; i++ ) { 85415091d37SBarry Smith v = aa + 9*ai[i]; 85515091d37SBarry Smith vi = aj + ai[i]; 85615091d37SBarry Smith nz = diag[i] - ai[i]; 85715091d37SBarry Smith idx += 3; 85815091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx]; 85915091d37SBarry Smith while (nz--) { 86015091d37SBarry Smith jdx = 3*(*vi++); 86115091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx]; 86215091d37SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 86315091d37SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 86415091d37SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 86515091d37SBarry Smith v += 9; 86615091d37SBarry Smith } 86715091d37SBarry Smith x[idx] = sum1; 86815091d37SBarry Smith x[1+idx] = sum2; 86915091d37SBarry Smith x[2+idx] = sum3; 87015091d37SBarry Smith } 87115091d37SBarry Smith /* backward solve the upper triangular */ 87215091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 87315091d37SBarry Smith v = aa + 9*diag[i] + 9; 87415091d37SBarry Smith vi = aj + diag[i] + 1; 87515091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 87615091d37SBarry Smith idt = 3*i; 87715091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 87815091d37SBarry Smith sum3 = x[2+idt]; 87915091d37SBarry Smith while (nz--) { 88015091d37SBarry Smith idx = 3*(*vi++); 88115091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 88215091d37SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 88315091d37SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 88415091d37SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 88515091d37SBarry Smith v += 9; 88615091d37SBarry Smith } 88715091d37SBarry Smith v = aa + 9*diag[i]; 88815091d37SBarry Smith x[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 88915091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 89015091d37SBarry Smith x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 89115091d37SBarry Smith } 89215091d37SBarry Smith 89315091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 89415091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 89515091d37SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 89615091d37SBarry Smith PetscFunctionReturn(0); 89715091d37SBarry Smith } 89815091d37SBarry Smith 8994e2b4712SSatish Balay #undef __FUNC__ 9004e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2" 9014e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 9024e2b4712SSatish Balay { 9034e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 9044e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 9054e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 9064e2b4712SSatish Balay int *diag = a->diag; 9073f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 9083f1db9ecSBarry Smith Scalar *x,*b,sum1,sum2,x1,x2,*tmp; 9094e2b4712SSatish Balay 9104e2b4712SSatish Balay PetscFunctionBegin; 911e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 912e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 9134e2b4712SSatish Balay tmp = a->solve_work; 9144e2b4712SSatish Balay 9154e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 9164e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 9174e2b4712SSatish Balay 9184e2b4712SSatish Balay /* forward solve the lower triangular */ 9194e2b4712SSatish Balay idx = 2*(*r++); 9204e2b4712SSatish Balay tmp[0] = b[idx]; tmp[1] = b[1+idx]; 9214e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 9224e2b4712SSatish Balay v = aa + 4*ai[i]; 9234e2b4712SSatish Balay vi = aj + ai[i]; 9244e2b4712SSatish Balay nz = diag[i] - ai[i]; 9254e2b4712SSatish Balay idx = 2*(*r++); 9264e2b4712SSatish Balay sum1 = b[idx]; sum2 = b[1+idx]; 9274e2b4712SSatish Balay while (nz--) { 9284e2b4712SSatish Balay idx = 2*(*vi++); 9294e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 9304e2b4712SSatish Balay sum1 -= v[0]*x1 + v[2]*x2; 9314e2b4712SSatish Balay sum2 -= v[1]*x1 + v[3]*x2; 9324e2b4712SSatish Balay v += 4; 9334e2b4712SSatish Balay } 9344e2b4712SSatish Balay idx = 2*i; 9354e2b4712SSatish Balay tmp[idx] = sum1; tmp[1+idx] = sum2; 9364e2b4712SSatish Balay } 9374e2b4712SSatish Balay /* backward solve the upper triangular */ 9384e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 9394e2b4712SSatish Balay v = aa + 4*diag[i] + 4; 9404e2b4712SSatish Balay vi = aj + diag[i] + 1; 9414e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 9424e2b4712SSatish Balay idt = 2*i; 9434e2b4712SSatish Balay sum1 = tmp[idt]; sum2 = tmp[1+idt]; 9444e2b4712SSatish Balay while (nz--) { 9454e2b4712SSatish Balay idx = 2*(*vi++); 9464e2b4712SSatish Balay x1 = tmp[idx]; x2 = tmp[1+idx]; 9474e2b4712SSatish Balay sum1 -= v[0]*x1 + v[2]*x2; 9484e2b4712SSatish Balay sum2 -= v[1]*x1 + v[3]*x2; 9494e2b4712SSatish Balay v += 4; 9504e2b4712SSatish Balay } 9514e2b4712SSatish Balay idc = 2*(*c--); 9524e2b4712SSatish Balay v = aa + 4*diag[i]; 9534e2b4712SSatish Balay x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 9544e2b4712SSatish Balay x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 9554e2b4712SSatish Balay } 9564e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 9574e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 958e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 959e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 96042015d63SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 9614e2b4712SSatish Balay PetscFunctionReturn(0); 9624e2b4712SSatish Balay } 9634e2b4712SSatish Balay 96415091d37SBarry Smith /* 96515091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 96615091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 96715091d37SBarry Smith */ 96815091d37SBarry Smith #undef __FUNC__ 96915091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering" 97015091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 97115091d37SBarry Smith { 97215091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 97315091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 97415091d37SBarry Smith int ierr,*diag = a->diag; 97515091d37SBarry Smith MatScalar *aa=a->a,*v; 97615091d37SBarry Smith Scalar *x,*b,sum1,sum2,x1,x2; 97715091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 97815091d37SBarry Smith 97915091d37SBarry Smith PetscFunctionBegin; 98015091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 98115091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 98215091d37SBarry Smith 98315091d37SBarry Smith /* forward solve the lower triangular */ 98415091d37SBarry Smith idx = 0; 98515091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; 98615091d37SBarry Smith for ( i=1; i<n; i++ ) { 98715091d37SBarry Smith v = aa + 4*ai[i]; 98815091d37SBarry Smith vi = aj + ai[i]; 98915091d37SBarry Smith nz = diag[i] - ai[i]; 99015091d37SBarry Smith idx += 2; 99115091d37SBarry Smith sum1 = b[idx];sum2 = b[1+idx]; 99215091d37SBarry Smith while (nz--) { 99315091d37SBarry Smith jdx = 2*(*vi++); 99415091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx]; 99515091d37SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 99615091d37SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 99715091d37SBarry Smith v += 4; 99815091d37SBarry Smith } 99915091d37SBarry Smith x[idx] = sum1; 100015091d37SBarry Smith x[1+idx] = sum2; 100115091d37SBarry Smith } 100215091d37SBarry Smith /* backward solve the upper triangular */ 100315091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 100415091d37SBarry Smith v = aa + 4*diag[i] + 4; 100515091d37SBarry Smith vi = aj + diag[i] + 1; 100615091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 100715091d37SBarry Smith idt = 2*i; 100815091d37SBarry Smith sum1 = x[idt]; sum2 = x[1+idt]; 100915091d37SBarry Smith while (nz--) { 101015091d37SBarry Smith idx = 2*(*vi++); 101115091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 101215091d37SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 101315091d37SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 101415091d37SBarry Smith v += 4; 101515091d37SBarry Smith } 101615091d37SBarry Smith v = aa + 4*diag[i]; 101715091d37SBarry Smith x[idt] = v[0]*sum1 + v[2]*sum2; 101815091d37SBarry Smith x[1+idt] = v[1]*sum1 + v[3]*sum2; 101915091d37SBarry Smith } 102015091d37SBarry Smith 102115091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 102215091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 102315091d37SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 102415091d37SBarry Smith PetscFunctionReturn(0); 102515091d37SBarry Smith } 102615091d37SBarry Smith 10274e2b4712SSatish Balay #undef __FUNC__ 10284e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1" 10294e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 10304e2b4712SSatish Balay { 10314e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 10324e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 10334e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 10344e2b4712SSatish Balay int *diag = a->diag; 10353f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 10363f1db9ecSBarry Smith Scalar *x,*b,sum1,*tmp; 10374e2b4712SSatish Balay 10384e2b4712SSatish Balay PetscFunctionBegin; 10394e2b4712SSatish Balay if (!n) PetscFunctionReturn(0); 10404e2b4712SSatish Balay 1041e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1042e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 10434e2b4712SSatish Balay tmp = a->solve_work; 10444e2b4712SSatish Balay 10454e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 10464e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 10474e2b4712SSatish Balay 10484e2b4712SSatish Balay /* forward solve the lower triangular */ 10494e2b4712SSatish Balay tmp[0] = b[*r++]; 10504e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 10514e2b4712SSatish Balay v = aa + ai[i]; 10524e2b4712SSatish Balay vi = aj + ai[i]; 10534e2b4712SSatish Balay nz = diag[i] - ai[i]; 10544e2b4712SSatish Balay sum1 = b[*r++]; 10554e2b4712SSatish Balay while (nz--) { 10564e2b4712SSatish Balay sum1 -= (*v++)*tmp[*vi++]; 10574e2b4712SSatish Balay } 10584e2b4712SSatish Balay tmp[i] = sum1; 10594e2b4712SSatish Balay } 10604e2b4712SSatish Balay /* backward solve the upper triangular */ 10614e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 10624e2b4712SSatish Balay v = aa + diag[i] + 1; 10634e2b4712SSatish Balay vi = aj + diag[i] + 1; 10644e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 10654e2b4712SSatish Balay sum1 = tmp[i]; 10664e2b4712SSatish Balay while (nz--) { 10674e2b4712SSatish Balay sum1 -= (*v++)*tmp[*vi++]; 10684e2b4712SSatish Balay } 10694e2b4712SSatish Balay x[*c--] = tmp[i] = aa[diag[i]]*sum1; 10704e2b4712SSatish Balay } 10714e2b4712SSatish Balay 10724e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 10734e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1074e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1075e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 10764e2b4712SSatish Balay PLogFlops(2*1*(a->nz) - a->n); 10774e2b4712SSatish Balay PetscFunctionReturn(0); 10784e2b4712SSatish Balay } 107915091d37SBarry Smith /* 108015091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 108115091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 108215091d37SBarry Smith */ 108315091d37SBarry Smith #undef __FUNC__ 108415091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering" 108515091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 108615091d37SBarry Smith { 108715091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 108815091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 108915091d37SBarry Smith int ierr,*diag = a->diag; 109015091d37SBarry Smith MatScalar *aa=a->a; 109115091d37SBarry Smith Scalar *x,*b; 109215091d37SBarry Smith Scalar sum1,x1; 109315091d37SBarry Smith MatScalar *v; 109415091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 109515091d37SBarry Smith 109615091d37SBarry Smith PetscFunctionBegin; 109715091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 109815091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 109915091d37SBarry Smith 110015091d37SBarry Smith /* forward solve the lower triangular */ 110115091d37SBarry Smith idx = 0; 110215091d37SBarry Smith x[0] = b[0]; 110315091d37SBarry Smith for ( i=1; i<n; i++ ) { 110415091d37SBarry Smith v = aa + ai[i]; 110515091d37SBarry Smith vi = aj + ai[i]; 110615091d37SBarry Smith nz = diag[i] - ai[i]; 110715091d37SBarry Smith idx += 1; 110815091d37SBarry Smith sum1 = b[idx]; 110915091d37SBarry Smith while (nz--) { 111015091d37SBarry Smith jdx = *vi++; 111115091d37SBarry Smith x1 = x[jdx]; 111215091d37SBarry Smith sum1 -= v[0]*x1; 111315091d37SBarry Smith v += 1; 111415091d37SBarry Smith } 111515091d37SBarry Smith x[idx] = sum1; 111615091d37SBarry Smith } 111715091d37SBarry Smith /* backward solve the upper triangular */ 111815091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 111915091d37SBarry Smith v = aa + diag[i] + 1; 112015091d37SBarry Smith vi = aj + diag[i] + 1; 112115091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 112215091d37SBarry Smith idt = i; 112315091d37SBarry Smith sum1 = x[idt]; 112415091d37SBarry Smith while (nz--) { 112515091d37SBarry Smith idx = *vi++; 112615091d37SBarry Smith x1 = x[idx]; 112715091d37SBarry Smith sum1 -= v[0]*x1; 112815091d37SBarry Smith v += 1; 112915091d37SBarry Smith } 113015091d37SBarry Smith v = aa + diag[i]; 113115091d37SBarry Smith x[idt] = v[0]*sum1; 113215091d37SBarry Smith } 113315091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 113415091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 113515091d37SBarry Smith PLogFlops(2*(a->nz) - a->n); 113615091d37SBarry Smith PetscFunctionReturn(0); 113715091d37SBarry Smith } 11384e2b4712SSatish Balay 11394e2b4712SSatish Balay /* ----------------------------------------------------------------*/ 11404e2b4712SSatish Balay /* 11414e2b4712SSatish Balay This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 11424e2b4712SSatish Balay except that the data structure of Mat_SeqAIJ is slightly different. 11434e2b4712SSatish Balay Not a good example of code reuse. 11444e2b4712SSatish Balay */ 1145435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat); 1146435faa5fSBarry Smith 11474e2b4712SSatish Balay #undef __FUNC__ 11484e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 1149435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 11504e2b4712SSatish Balay { 11514e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 11524e2b4712SSatish Balay IS isicol; 11534e2b4712SSatish Balay int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 11544e2b4712SSatish Balay int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 1155335d9088SBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 1156435faa5fSBarry Smith int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill; 11574e2b4712SSatish Balay PetscTruth col_identity, row_identity; 1158435faa5fSBarry Smith double f; 11594e2b4712SSatish Balay 11604e2b4712SSatish Balay PetscFunctionBegin; 1161435faa5fSBarry Smith if (info) { 1162435faa5fSBarry Smith f = info->fill; 1163335d9088SBarry Smith levels = (int) info->levels; 1164335d9088SBarry Smith diagonal_fill = (int) info->diagonal_fill; 1165435faa5fSBarry Smith } else { 1166435faa5fSBarry Smith f = 1.0; 1167435faa5fSBarry Smith levels = 0; 1168435faa5fSBarry Smith diagonal_fill = 0; 1169435faa5fSBarry Smith } 1170e51c0b9cSSatish Balay ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 1171e51c0b9cSSatish Balay 11724e2b4712SSatish Balay /* special case that simply copies fill pattern */ 11734e2b4712SSatish Balay PetscValidHeaderSpecific(isrow,IS_COOKIE); 11744e2b4712SSatish Balay PetscValidHeaderSpecific(iscol,IS_COOKIE); 1175667159a5SBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1176667159a5SBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 11774e2b4712SSatish Balay if (levels == 0 && row_identity && col_identity) { 1178e1293385SBarry Smith ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 11794e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 11804e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 11814e2b4712SSatish Balay if (!b->diag) { 11824e2b4712SSatish Balay ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 11834e2b4712SSatish Balay } 1184435faa5fSBarry Smith ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 11854e2b4712SSatish Balay b->row = isrow; 11864e2b4712SSatish Balay b->col = iscol; 1187e51c0b9cSSatish Balay b->icol = isicol; 11884e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 11894e2b4712SSatish Balay /* 119015091d37SBarry Smith Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 119115091d37SBarry Smith for ILU(0) factorization with natural ordering 11924e2b4712SSatish Balay */ 119315091d37SBarry Smith switch (b->bs) { 119415091d37SBarry Smith case 2: 119515091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 119615091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 119715091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 119815091d37SBarry Smith break; 119915091d37SBarry Smith case 3: 120015091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 120115091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 120215091d37SBarry Smith PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 120315091d37SBarry Smith break; 120415091d37SBarry Smith case 4: 1205f830108cSBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1206f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 120715091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 120815091d37SBarry Smith break; 120915091d37SBarry Smith case 5: 1210667159a5SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1211667159a5SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 121215091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 121315091d37SBarry Smith break; 121415091d37SBarry Smith case 6: 121515091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 121615091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 121715091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 121815091d37SBarry Smith break; 121915091d37SBarry Smith case 7: 122015091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 122115091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 122215091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 122315091d37SBarry Smith break; 12244e2b4712SSatish Balay } 12254e2b4712SSatish Balay PetscFunctionReturn(0); 12264e2b4712SSatish Balay } 12274e2b4712SSatish Balay 12284e2b4712SSatish Balay ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 12294e2b4712SSatish Balay ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 12304e2b4712SSatish Balay 12314e2b4712SSatish Balay /* get new row pointers */ 12324e2b4712SSatish Balay ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 12334e2b4712SSatish Balay ainew[0] = 0; 12344e2b4712SSatish Balay /* don't know how many column pointers are needed so estimate */ 12354e2b4712SSatish Balay jmax = (int) (f*ai[n] + 1); 12364e2b4712SSatish Balay ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 12374e2b4712SSatish Balay /* ajfill is level of fill for each fill entry */ 12384e2b4712SSatish Balay ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 12394e2b4712SSatish Balay /* fill is a linked list of nonzeros in active row */ 12404e2b4712SSatish Balay fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 12414e2b4712SSatish Balay /* im is level for each filled value */ 12424e2b4712SSatish Balay im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 12434e2b4712SSatish Balay /* dloc is location of diagonal in factor */ 12444e2b4712SSatish Balay dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 12454e2b4712SSatish Balay dloc[0] = 0; 12464e2b4712SSatish Balay for ( prow=0; prow<n; prow++ ) { 1247435faa5fSBarry Smith 1248435faa5fSBarry Smith /* copy prow into linked list */ 12494e2b4712SSatish Balay nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 12504e2b4712SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 12514e2b4712SSatish Balay xi = aj + ai[r[prow]]; 12524e2b4712SSatish Balay fill[n] = n; 1253435faa5fSBarry Smith fill[prow] = -1; /* marker for diagonal entry */ 12544e2b4712SSatish Balay while (nz--) { 12554e2b4712SSatish Balay fm = n; 12564e2b4712SSatish Balay idx = ic[*xi++]; 12574e2b4712SSatish Balay do { 12584e2b4712SSatish Balay m = fm; 12594e2b4712SSatish Balay fm = fill[m]; 12604e2b4712SSatish Balay } while (fm < idx); 12614e2b4712SSatish Balay fill[m] = idx; 12624e2b4712SSatish Balay fill[idx] = fm; 12634e2b4712SSatish Balay im[idx] = 0; 12644e2b4712SSatish Balay } 1265435faa5fSBarry Smith 1266435faa5fSBarry Smith /* make sure diagonal entry is included */ 1267435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 1268435faa5fSBarry Smith fm = n; 1269435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 1270435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1271435faa5fSBarry Smith fill[fm] = prow; 1272435faa5fSBarry Smith im[prow] = 0; 1273435faa5fSBarry Smith nzf++; 1274335d9088SBarry Smith dcount++; 1275435faa5fSBarry Smith } 1276435faa5fSBarry Smith 12774e2b4712SSatish Balay nzi = 0; 12784e2b4712SSatish Balay row = fill[n]; 12794e2b4712SSatish Balay while ( row < prow ) { 12804e2b4712SSatish Balay incrlev = im[row] + 1; 12814e2b4712SSatish Balay nz = dloc[row]; 1282435faa5fSBarry Smith xi = ajnew + ainew[row] + nz + 1; 12834e2b4712SSatish Balay flev = ajfill + ainew[row] + nz + 1; 12844e2b4712SSatish Balay nnz = ainew[row+1] - ainew[row] - nz - 1; 12854e2b4712SSatish Balay fm = row; 12864e2b4712SSatish Balay while (nnz-- > 0) { 12874e2b4712SSatish Balay idx = *xi++; 12884e2b4712SSatish Balay if (*flev + incrlev > levels) { 12894e2b4712SSatish Balay flev++; 12904e2b4712SSatish Balay continue; 12914e2b4712SSatish Balay } 12924e2b4712SSatish Balay do { 12934e2b4712SSatish Balay m = fm; 12944e2b4712SSatish Balay fm = fill[m]; 12954e2b4712SSatish Balay } while (fm < idx); 12964e2b4712SSatish Balay if (fm != idx) { 12974e2b4712SSatish Balay im[idx] = *flev + incrlev; 12984e2b4712SSatish Balay fill[m] = idx; 12994e2b4712SSatish Balay fill[idx] = fm; 13004e2b4712SSatish Balay fm = idx; 13014e2b4712SSatish Balay nzf++; 1302ecf371e4SBarry Smith } else { 13034e2b4712SSatish Balay if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 13044e2b4712SSatish Balay } 13054e2b4712SSatish Balay flev++; 13064e2b4712SSatish Balay } 13074e2b4712SSatish Balay row = fill[row]; 13084e2b4712SSatish Balay nzi++; 13094e2b4712SSatish Balay } 13104e2b4712SSatish Balay /* copy new filled row into permanent storage */ 13114e2b4712SSatish Balay ainew[prow+1] = ainew[prow] + nzf; 13124e2b4712SSatish Balay if (ainew[prow+1] > jmax) { 1313ecf371e4SBarry Smith 1314ecf371e4SBarry Smith /* estimate how much additional space we will need */ 1315ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1316ecf371e4SBarry Smith /* just double the memory each time */ 1317ecf371e4SBarry Smith int maxadd = jmax; 1318ecf371e4SBarry Smith /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 13194e2b4712SSatish Balay if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 13204e2b4712SSatish Balay jmax += maxadd; 1321ecf371e4SBarry Smith 1322ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 13234e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1324549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1325*606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 13264e2b4712SSatish Balay ajnew = xi; 13274e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1328549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1329*606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 13304e2b4712SSatish Balay ajfill = xi; 1331ecf371e4SBarry Smith realloc++; /* count how many reallocations are needed */ 13324e2b4712SSatish Balay } 13334e2b4712SSatish Balay xi = ajnew + ainew[prow]; 13344e2b4712SSatish Balay flev = ajfill + ainew[prow]; 13354e2b4712SSatish Balay dloc[prow] = nzi; 13364e2b4712SSatish Balay fm = fill[n]; 13374e2b4712SSatish Balay while (nzf--) { 13384e2b4712SSatish Balay *xi++ = fm; 13394e2b4712SSatish Balay *flev++ = im[fm]; 13404e2b4712SSatish Balay fm = fill[fm]; 13414e2b4712SSatish Balay } 1342435faa5fSBarry Smith /* make sure row has diagonal entry */ 1343435faa5fSBarry Smith if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1344435faa5fSBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1345435faa5fSBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1346435faa5fSBarry Smith } 13474e2b4712SSatish Balay } 1348*606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 13494e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 13504e2b4712SSatish Balay ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1351*606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 1352*606d414cSSatish Balay ierr = PetscFree(im);CHKERRQ(ierr); 13534e2b4712SSatish Balay 13544e2b4712SSatish Balay { 13554e2b4712SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 1356981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 13574e2b4712SSatish Balay realloc,f,af); 1358981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 1359981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 1360981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 1361335d9088SBarry Smith if (diagonal_fill) { 1362335d9088SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount); 1363335d9088SBarry Smith } 13644e2b4712SSatish Balay } 13654e2b4712SSatish Balay 13664e2b4712SSatish Balay /* put together the new matrix */ 13674e2b4712SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1368fa6007c9SSatish Balay PLogObjectParent(*fact,isicol); 13694e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 1370*606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 13714e2b4712SSatish Balay b->singlemalloc = 0; 13723f1db9ecSBarry Smith len = bs2*ainew[n]*sizeof(MatScalar); 13734e2b4712SSatish Balay /* the next line frees the default space generated by the Create() */ 1374*606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 1375*606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 13763f1db9ecSBarry Smith b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 13774e2b4712SSatish Balay b->j = ajnew; 13784e2b4712SSatish Balay b->i = ainew; 13794e2b4712SSatish Balay for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 13804e2b4712SSatish Balay b->diag = dloc; 13814e2b4712SSatish Balay b->ilen = 0; 13824e2b4712SSatish Balay b->imax = 0; 13834e2b4712SSatish Balay b->row = isrow; 13844e2b4712SSatish Balay b->col = iscol; 1385e51c0b9cSSatish Balay b->icol = isicol; 13864e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 13874e2b4712SSatish Balay /* In b structure: Free imax, ilen, old a, old j. 13884e2b4712SSatish Balay Allocate dloc, solve_work, new a, new j */ 13894e2b4712SSatish Balay PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 13904e2b4712SSatish Balay b->maxnz = b->nz = ainew[n]; 13914e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 13924e2b4712SSatish Balay 13934e2b4712SSatish Balay (*fact)->info.factor_mallocs = realloc; 13944e2b4712SSatish Balay (*fact)->info.fill_ratio_given = f; 13954e2b4712SSatish Balay (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 13964e2b4712SSatish Balay 13974e2b4712SSatish Balay PetscFunctionReturn(0); 13984e2b4712SSatish Balay } 13994e2b4712SSatish Balay 14004e2b4712SSatish Balay 14014e2b4712SSatish Balay 14024e2b4712SSatish Balay 1403