xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision 606d414c6e034e02e67059b83ebaefc3ebe99698)
14e2b4712SSatish Balay #ifdef PETSC_RCS_HEADER
2*606d414cSSatish Balay static char vcid[] = "$Id: baijfact2.c,v 1.28 1999/05/12 03:29:44 bsmith Exp balay $";
34e2b4712SSatish Balay #endif
44e2b4712SSatish Balay /*
54e2b4712SSatish Balay     Factorization code for BAIJ format.
64e2b4712SSatish Balay */
74e2b4712SSatish Balay 
84e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h"
94e2b4712SSatish Balay #include "src/vec/vecimpl.h"
104e2b4712SSatish Balay #include "src/inline/ilu.h"
1174c49faeSBarry Smith #include "src/inline/dot.h"
124e2b4712SSatish Balay 
134e2b4712SSatish Balay /* ----------------------------------------------------------- */
144e2b4712SSatish Balay #undef __FUNC__
154e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N"
164e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
174e2b4712SSatish Balay {
184e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
194e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
204e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
214e2b4712SSatish Balay   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
223f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
233f1db9ecSBarry Smith   Scalar          *x,*b,*sum,*tmp,*lsum;
244e2b4712SSatish Balay 
254e2b4712SSatish Balay   PetscFunctionBegin;
26e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
284e2b4712SSatish Balay   tmp  = a->solve_work;
294e2b4712SSatish Balay 
304e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
314e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
324e2b4712SSatish Balay 
334e2b4712SSatish Balay   /* forward solve the lower triangular */
34549d3d68SSatish Balay   ierr = PetscMemcpy(tmp,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
354e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
364e2b4712SSatish Balay     v   = aa + bs2*ai[i];
374e2b4712SSatish Balay     vi  = aj + ai[i];
384e2b4712SSatish Balay     nz  = a->diag[i] - ai[i];
394e2b4712SSatish Balay     sum = tmp + bs*i;
40962fb4a0SBarry Smith     ierr = PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
414e2b4712SSatish Balay     while (nz--) {
424e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++));
434e2b4712SSatish Balay       v += bs2;
444e2b4712SSatish Balay     }
454e2b4712SSatish Balay   }
464e2b4712SSatish Balay   /* backward solve the upper triangular */
474e2b4712SSatish Balay   lsum = a->solve_work + a->n;
484e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
494e2b4712SSatish Balay     v   = aa + bs2*(a->diag[i] + 1);
504e2b4712SSatish Balay     vi  = aj + a->diag[i] + 1;
514e2b4712SSatish Balay     nz  = ai[i+1] - a->diag[i] - 1;
52549d3d68SSatish Balay     ierr = PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
534e2b4712SSatish Balay     while (nz--) {
544e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++));
554e2b4712SSatish Balay       v += bs2;
564e2b4712SSatish Balay     }
574e2b4712SSatish Balay     Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs);
58549d3d68SSatish Balay     ierr = PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
594e2b4712SSatish Balay   }
604e2b4712SSatish Balay 
614e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
624e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
63e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
64e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6542015d63SBarry Smith   PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n);
664e2b4712SSatish Balay   PetscFunctionReturn(0);
674e2b4712SSatish Balay }
684e2b4712SSatish Balay 
694e2b4712SSatish Balay #undef __FUNC__
704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7"
714e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
724e2b4712SSatish Balay {
734e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
744e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
754e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
764e2b4712SSatish Balay   int             *diag = a->diag;
773f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
783f1db9ecSBarry Smith   Scalar          sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
793f1db9ecSBarry Smith   Scalar          *x,*b,*tmp;
804e2b4712SSatish Balay 
814e2b4712SSatish Balay   PetscFunctionBegin;
82e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
83e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
844e2b4712SSatish Balay   tmp  = a->solve_work;
854e2b4712SSatish Balay 
864e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
874e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
884e2b4712SSatish Balay 
894e2b4712SSatish Balay   /* forward solve the lower triangular */
904e2b4712SSatish Balay   idx    = 7*(*r++);
914e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
924e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
934e2b4712SSatish Balay   tmp[5] = b[5+idx]; tmp[6] = b[6+idx];
944e2b4712SSatish Balay 
954e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
964e2b4712SSatish Balay     v     = aa + 49*ai[i];
974e2b4712SSatish Balay     vi    = aj + ai[i];
984e2b4712SSatish Balay     nz    = diag[i] - ai[i];
994e2b4712SSatish Balay     idx   = 7*(*r++);
1004e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
1014e2b4712SSatish Balay     sum5  = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx];
1024e2b4712SSatish Balay     while (nz--) {
1034e2b4712SSatish Balay       idx   = 7*(*vi++);
1044e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
1054e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
1064e2b4712SSatish Balay       x6    = tmp[5+idx];x7 = tmp[6+idx];
1074e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1084e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1094e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1104e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1114e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1124e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1134e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1144e2b4712SSatish Balay       v += 49;
1154e2b4712SSatish Balay     }
1164e2b4712SSatish Balay     idx = 7*i;
1174e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
1184e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
1194e2b4712SSatish Balay     tmp[5+idx] = sum6;tmp[6+idx] = sum7;
1204e2b4712SSatish Balay   }
1214e2b4712SSatish Balay   /* backward solve the upper triangular */
1224e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
1234e2b4712SSatish Balay     v    = aa + 49*diag[i] + 49;
1244e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
1254e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
1264e2b4712SSatish Balay     idt  = 7*i;
1274e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
1284e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
1294e2b4712SSatish Balay     sum6 = tmp[5+idt];sum7 = tmp[6+idt];
1304e2b4712SSatish Balay     while (nz--) {
1314e2b4712SSatish Balay       idx   = 7*(*vi++);
1324e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
1334e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
1344e2b4712SSatish Balay       x6    = tmp[5+idx]; x7 = tmp[6+idx];
1354e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1364e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1374e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1384e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1394e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1404e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1414e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1424e2b4712SSatish Balay       v += 49;
1434e2b4712SSatish Balay     }
1444e2b4712SSatish Balay     idc = 7*(*c--);
1454e2b4712SSatish Balay     v   = aa + 49*diag[i];
1464e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[7]*sum2+v[14]*sum3+
1474e2b4712SSatish Balay                                  v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7;
1484e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+
1494e2b4712SSatish Balay                                  v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7;
1504e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+
1514e2b4712SSatish Balay                                  v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7;
1524e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+
1534e2b4712SSatish Balay                                  v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7;
1544e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+
1554e2b4712SSatish Balay                                  v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7;
1564e2b4712SSatish Balay     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+
1574e2b4712SSatish Balay                                  v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7;
1584e2b4712SSatish Balay     x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+
1594e2b4712SSatish Balay                                  v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7;
1604e2b4712SSatish Balay   }
1614e2b4712SSatish Balay 
1624e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1634e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
164e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
165e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
16642015d63SBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
1674e2b4712SSatish Balay   PetscFunctionReturn(0);
1684e2b4712SSatish Balay }
1694e2b4712SSatish Balay 
1704e2b4712SSatish Balay #undef __FUNC__
17115091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering"
17215091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
17315091d37SBarry Smith {
17415091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
17515091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
17615091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
17715091d37SBarry Smith   MatScalar       *aa=a->a,*v;
17815091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
17915091d37SBarry Smith 
18015091d37SBarry Smith   PetscFunctionBegin;
18115091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
18215091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
18315091d37SBarry Smith   /* forward solve the lower triangular */
18415091d37SBarry Smith   idx    = 0;
18515091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
18615091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
18715091d37SBarry Smith   x[6] = b[6+idx];
18815091d37SBarry Smith   for ( i=1; i<n; i++ ) {
18915091d37SBarry Smith     v     =  aa + 49*ai[i];
19015091d37SBarry Smith     vi    =  aj + ai[i];
19115091d37SBarry Smith     nz    =  diag[i] - ai[i];
19215091d37SBarry Smith     idx   =  7*i;
19315091d37SBarry Smith     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
19415091d37SBarry Smith     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
19515091d37SBarry Smith     sum7  =  b[6+idx];
19615091d37SBarry Smith     while (nz--) {
19715091d37SBarry Smith       jdx   = 7*(*vi++);
19815091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
19915091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
20015091d37SBarry Smith       x7    = x[6+jdx];
20115091d37SBarry Smith       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
20215091d37SBarry Smith       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
20315091d37SBarry Smith       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
20415091d37SBarry Smith       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
20515091d37SBarry Smith       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
20615091d37SBarry Smith       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
20715091d37SBarry Smith       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
20815091d37SBarry Smith       v += 49;
20915091d37SBarry Smith      }
21015091d37SBarry Smith     x[idx]   = sum1;
21115091d37SBarry Smith     x[1+idx] = sum2;
21215091d37SBarry Smith     x[2+idx] = sum3;
21315091d37SBarry Smith     x[3+idx] = sum4;
21415091d37SBarry Smith     x[4+idx] = sum5;
21515091d37SBarry Smith     x[5+idx] = sum6;
21615091d37SBarry Smith     x[6+idx] = sum7;
21715091d37SBarry Smith   }
21815091d37SBarry Smith   /* backward solve the upper triangular */
21915091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
22015091d37SBarry Smith     v    = aa + 49*diag[i] + 49;
22115091d37SBarry Smith     vi   = aj + diag[i] + 1;
22215091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
22315091d37SBarry Smith     idt  = 7*i;
22415091d37SBarry Smith     sum1 = x[idt];   sum2 = x[1+idt];
22515091d37SBarry Smith     sum3 = x[2+idt]; sum4 = x[3+idt];
22615091d37SBarry Smith     sum5 = x[4+idt]; sum6 = x[5+idt];
22715091d37SBarry Smith     sum7 = x[6+idt];
22815091d37SBarry Smith     while (nz--) {
22915091d37SBarry Smith       idx   = 7*(*vi++);
23015091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
23115091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
23215091d37SBarry Smith       x7    = x[6+idx];
23315091d37SBarry Smith       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
23415091d37SBarry Smith       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
23515091d37SBarry Smith       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
23615091d37SBarry Smith       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
23715091d37SBarry Smith       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
23815091d37SBarry Smith       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
23915091d37SBarry Smith       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
24015091d37SBarry Smith       v += 49;
24115091d37SBarry Smith     }
24215091d37SBarry Smith     v        = aa + 49*diag[i];
24315091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[7]*sum2  + v[14]*sum3 + v[21]*sum4
24415091d37SBarry Smith                          + v[28]*sum5 + v[35]*sum6 + v[42]*sum7;
24515091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[8]*sum2  + v[15]*sum3 + v[22]*sum4
24615091d37SBarry Smith                          + v[29]*sum5 + v[36]*sum6 + v[43]*sum7;
24715091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[9]*sum2  + v[16]*sum3 + v[23]*sum4
24815091d37SBarry Smith                          + v[30]*sum5 + v[37]*sum6 + v[44]*sum7;
24915091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[10]*sum2  + v[17]*sum3 + v[24]*sum4
25015091d37SBarry Smith                          + v[31]*sum5 + v[38]*sum6 + v[45]*sum7;
25115091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[11]*sum2  + v[18]*sum3 + v[25]*sum4
25215091d37SBarry Smith                          + v[32]*sum5 + v[39]*sum6 + v[46]*sum7;
25315091d37SBarry Smith     x[5+idt] = v[5]*sum1 + v[12]*sum2  + v[19]*sum3 + v[26]*sum4
25415091d37SBarry Smith                          + v[33]*sum5 + v[40]*sum6 + v[47]*sum7;
25515091d37SBarry Smith     x[6+idt] = v[6]*sum1 + v[13]*sum2  + v[20]*sum3 + v[27]*sum4
25615091d37SBarry Smith                          + v[34]*sum5 + v[41]*sum6 + v[48]*sum7;
25715091d37SBarry Smith   }
25815091d37SBarry Smith 
25915091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
26015091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
26115091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
26215091d37SBarry Smith   PetscFunctionReturn(0);
26315091d37SBarry Smith }
26415091d37SBarry Smith 
26515091d37SBarry Smith #undef __FUNC__
26615091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6"
26715091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
26815091d37SBarry Smith {
26915091d37SBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
27015091d37SBarry Smith   IS              iscol=a->col,isrow=a->row;
27115091d37SBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
27215091d37SBarry Smith   int             *diag = a->diag;
27315091d37SBarry Smith   MatScalar       *aa=a->a,*v;
27415091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp;
27515091d37SBarry Smith 
27615091d37SBarry Smith   PetscFunctionBegin;
27715091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27815091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
27915091d37SBarry Smith   tmp  = a->solve_work;
28015091d37SBarry Smith 
28115091d37SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
28215091d37SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
28315091d37SBarry Smith 
28415091d37SBarry Smith   /* forward solve the lower triangular */
28515091d37SBarry Smith   idx    = 6*(*r++);
28615091d37SBarry Smith   tmp[0] = b[idx];   tmp[1] = b[1+idx];
28715091d37SBarry Smith   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
28815091d37SBarry Smith   tmp[4] = b[4+idx]; tmp[5] = b[5+idx];
28915091d37SBarry Smith   for ( i=1; i<n; i++ ) {
29015091d37SBarry Smith     v     = aa + 36*ai[i];
29115091d37SBarry Smith     vi    = aj + ai[i];
29215091d37SBarry Smith     nz    = diag[i] - ai[i];
29315091d37SBarry Smith     idx   = 6*(*r++);
29415091d37SBarry Smith     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
29515091d37SBarry Smith     sum5  = b[4+idx]; sum6 = b[5+idx];
29615091d37SBarry Smith     while (nz--) {
29715091d37SBarry Smith       idx   = 6*(*vi++);
29815091d37SBarry Smith       x1    = tmp[idx];   x2 = tmp[1+idx]; x3 = tmp[2+idx];
29915091d37SBarry Smith       x4    = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx];
30015091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
30115091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
30215091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
30315091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
30415091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
30515091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
30615091d37SBarry Smith       v += 36;
30715091d37SBarry Smith     }
30815091d37SBarry Smith     idx = 6*i;
30915091d37SBarry Smith     tmp[idx]   = sum1;tmp[1+idx] = sum2;
31015091d37SBarry Smith     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
31115091d37SBarry Smith     tmp[4+idx] = sum5;tmp[5+idx] = sum6;
31215091d37SBarry Smith   }
31315091d37SBarry Smith   /* backward solve the upper triangular */
31415091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
31515091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
31615091d37SBarry Smith     vi   = aj + diag[i] + 1;
31715091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
31815091d37SBarry Smith     idt  = 6*i;
31915091d37SBarry Smith     sum1 = tmp[idt];  sum2 = tmp[1+idt];
32015091d37SBarry Smith     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
32115091d37SBarry Smith     sum5 = tmp[4+idt];sum6 = tmp[5+idt];
32215091d37SBarry Smith     while (nz--) {
32315091d37SBarry Smith       idx   = 6*(*vi++);
32415091d37SBarry Smith       x1    = tmp[idx];   x2 = tmp[1+idx];
32515091d37SBarry Smith       x3    = tmp[2+idx]; x4 = tmp[3+idx];
32615091d37SBarry Smith       x5    = tmp[4+idx]; x6 = tmp[5+idx];
32715091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
32815091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
32915091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
33015091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
33115091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
33215091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
33315091d37SBarry Smith       v += 36;
33415091d37SBarry Smith     }
33515091d37SBarry Smith     idc = 6*(*c--);
33615091d37SBarry Smith     v   = aa + 36*diag[i];
33715091d37SBarry Smith     x[idc]   = tmp[idt]   = v[0]*sum1+v[6]*sum2+v[12]*sum3+
33815091d37SBarry Smith                                  v[18]*sum4+v[24]*sum5+v[30]*sum6;
33915091d37SBarry Smith     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+
34015091d37SBarry Smith                                  v[19]*sum4+v[25]*sum5+v[31]*sum6;
34115091d37SBarry Smith     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+
34215091d37SBarry Smith                                  v[20]*sum4+v[26]*sum5+v[32]*sum6;
34315091d37SBarry Smith     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+
34415091d37SBarry Smith                                  v[21]*sum4+v[27]*sum5+v[33]*sum6;
34515091d37SBarry Smith     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+
34615091d37SBarry Smith                                  v[22]*sum4+v[28]*sum5+v[34]*sum6;
34715091d37SBarry Smith     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+
34815091d37SBarry Smith                                  v[23]*sum4+v[29]*sum5+v[35]*sum6;
34915091d37SBarry Smith   }
35015091d37SBarry Smith 
35115091d37SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
35215091d37SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
35315091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
35415091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
35515091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
35615091d37SBarry Smith   PetscFunctionReturn(0);
35715091d37SBarry Smith }
35815091d37SBarry Smith 
35915091d37SBarry Smith #undef __FUNC__
36015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering"
36115091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
36215091d37SBarry Smith {
36315091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
36415091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
36515091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
36615091d37SBarry Smith   MatScalar       *aa=a->a,*v;
36715091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6;
36815091d37SBarry Smith 
36915091d37SBarry Smith   PetscFunctionBegin;
37015091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
37115091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
37215091d37SBarry Smith   /* forward solve the lower triangular */
37315091d37SBarry Smith   idx    = 0;
37415091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
37515091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
37615091d37SBarry Smith   for ( i=1; i<n; i++ ) {
37715091d37SBarry Smith     v     =  aa + 36*ai[i];
37815091d37SBarry Smith     vi    =  aj + ai[i];
37915091d37SBarry Smith     nz    =  diag[i] - ai[i];
38015091d37SBarry Smith     idx   =  6*i;
38115091d37SBarry Smith     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
38215091d37SBarry Smith     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
38315091d37SBarry Smith     while (nz--) {
38415091d37SBarry Smith       jdx   = 6*(*vi++);
38515091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
38615091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
38715091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
38815091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
38915091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
39015091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
39115091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
39215091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
39315091d37SBarry Smith       v += 36;
39415091d37SBarry Smith      }
39515091d37SBarry Smith     x[idx]   = sum1;
39615091d37SBarry Smith     x[1+idx] = sum2;
39715091d37SBarry Smith     x[2+idx] = sum3;
39815091d37SBarry Smith     x[3+idx] = sum4;
39915091d37SBarry Smith     x[4+idx] = sum5;
40015091d37SBarry Smith     x[5+idx] = sum6;
40115091d37SBarry Smith   }
40215091d37SBarry Smith   /* backward solve the upper triangular */
40315091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
40415091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
40515091d37SBarry Smith     vi   = aj + diag[i] + 1;
40615091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
40715091d37SBarry Smith     idt  = 6*i;
40815091d37SBarry Smith     sum1 = x[idt];   sum2 = x[1+idt];
40915091d37SBarry Smith     sum3 = x[2+idt]; sum4 = x[3+idt];
41015091d37SBarry Smith     sum5 = x[4+idt]; sum6 = x[5+idt];
41115091d37SBarry Smith     while (nz--) {
41215091d37SBarry Smith       idx   = 6*(*vi++);
41315091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
41415091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
41515091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
41615091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
41715091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
41815091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
41915091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
42015091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
42115091d37SBarry Smith       v += 36;
42215091d37SBarry Smith     }
42315091d37SBarry Smith     v        = aa + 36*diag[i];
42415091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[6]*sum2  + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6;
42515091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[7]*sum2  + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6;
42615091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[8]*sum2  + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6;
42715091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[9]*sum2  + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6;
42815091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6;
42915091d37SBarry Smith     x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6;
43015091d37SBarry Smith   }
43115091d37SBarry Smith 
43215091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
43315091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
43415091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
43515091d37SBarry Smith   PetscFunctionReturn(0);
43615091d37SBarry Smith }
43715091d37SBarry Smith 
43815091d37SBarry Smith #undef __FUNC__
4394e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5"
4404e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
4414e2b4712SSatish Balay {
4424e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
4434e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
4444e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
4454e2b4712SSatish Balay   int             *diag = a->diag;
4463f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
4473f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp;
4484e2b4712SSatish Balay 
4494e2b4712SSatish Balay   PetscFunctionBegin;
450e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
451e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4524e2b4712SSatish Balay   tmp  = a->solve_work;
4534e2b4712SSatish Balay 
4544e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
4554e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
4564e2b4712SSatish Balay 
4574e2b4712SSatish Balay   /* forward solve the lower triangular */
4584e2b4712SSatish Balay   idx    = 5*(*r++);
4594e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
4604e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
4614e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
4624e2b4712SSatish Balay     v     = aa + 25*ai[i];
4634e2b4712SSatish Balay     vi    = aj + ai[i];
4644e2b4712SSatish Balay     nz    = diag[i] - ai[i];
4654e2b4712SSatish Balay     idx   = 5*(*r++);
4664e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
4674e2b4712SSatish Balay     sum5  = b[4+idx];
4684e2b4712SSatish Balay     while (nz--) {
4694e2b4712SSatish Balay       idx   = 5*(*vi++);
4704e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
4714e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
4724e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
4734e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
4744e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
4754e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
4764e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
4774e2b4712SSatish Balay       v += 25;
4784e2b4712SSatish Balay     }
4794e2b4712SSatish Balay     idx = 5*i;
4804e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
4814e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
4824e2b4712SSatish Balay   }
4834e2b4712SSatish Balay   /* backward solve the upper triangular */
4844e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
4854e2b4712SSatish Balay     v    = aa + 25*diag[i] + 25;
4864e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
4874e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
4884e2b4712SSatish Balay     idt  = 5*i;
4894e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
4904e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
4914e2b4712SSatish Balay     while (nz--) {
4924e2b4712SSatish Balay       idx   = 5*(*vi++);
4934e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
4944e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
4954e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
4964e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
4974e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
4984e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
4994e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
5004e2b4712SSatish Balay       v += 25;
5014e2b4712SSatish Balay     }
5024e2b4712SSatish Balay     idc = 5*(*c--);
5034e2b4712SSatish Balay     v   = aa + 25*diag[i];
5044e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[5]*sum2+v[10]*sum3+
5054e2b4712SSatish Balay                                  v[15]*sum4+v[20]*sum5;
5064e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+
5074e2b4712SSatish Balay                                  v[16]*sum4+v[21]*sum5;
5084e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+
5094e2b4712SSatish Balay                                  v[17]*sum4+v[22]*sum5;
5104e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+
5114e2b4712SSatish Balay                                  v[18]*sum4+v[23]*sum5;
5124e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+
5134e2b4712SSatish Balay                                  v[19]*sum4+v[24]*sum5;
5144e2b4712SSatish Balay   }
5154e2b4712SSatish Balay 
5164e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
5174e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
518e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
519e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
52042015d63SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
5214e2b4712SSatish Balay   PetscFunctionReturn(0);
5224e2b4712SSatish Balay }
5234e2b4712SSatish Balay 
5244e2b4712SSatish Balay #undef __FUNC__
52515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
52615091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
52715091d37SBarry Smith {
52815091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
52915091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
53015091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
53115091d37SBarry Smith   MatScalar       *aa=a->a,*v;
53215091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;;
53315091d37SBarry Smith 
53415091d37SBarry Smith   PetscFunctionBegin;
53515091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
53615091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
53715091d37SBarry Smith   /* forward solve the lower triangular */
53815091d37SBarry Smith   idx    = 0;
53915091d37SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
54015091d37SBarry Smith   for ( i=1; i<n; i++ ) {
54115091d37SBarry Smith     v     =  aa + 25*ai[i];
54215091d37SBarry Smith     vi    =  aj + ai[i];
54315091d37SBarry Smith     nz    =  diag[i] - ai[i];
54415091d37SBarry Smith     idx   =  5*i;
54515091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx];
54615091d37SBarry Smith     while (nz--) {
54715091d37SBarry Smith       jdx   = 5*(*vi++);
54815091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
54915091d37SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
55015091d37SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
55115091d37SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
55215091d37SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
55315091d37SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
55415091d37SBarry Smith       v    += 25;
55515091d37SBarry Smith     }
55615091d37SBarry Smith     x[idx]   = sum1;
55715091d37SBarry Smith     x[1+idx] = sum2;
55815091d37SBarry Smith     x[2+idx] = sum3;
55915091d37SBarry Smith     x[3+idx] = sum4;
56015091d37SBarry Smith     x[4+idx] = sum5;
56115091d37SBarry Smith   }
56215091d37SBarry Smith   /* backward solve the upper triangular */
56315091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
56415091d37SBarry Smith     v    = aa + 25*diag[i] + 25;
56515091d37SBarry Smith     vi   = aj + diag[i] + 1;
56615091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
56715091d37SBarry Smith     idt  = 5*i;
56815091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
56915091d37SBarry Smith     sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt];
57015091d37SBarry Smith     while (nz--) {
57115091d37SBarry Smith       idx   = 5*(*vi++);
57215091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
57315091d37SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
57415091d37SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
57515091d37SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
57615091d37SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
57715091d37SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
57815091d37SBarry Smith       v    += 25;
57915091d37SBarry Smith     }
58015091d37SBarry Smith     v        = aa + 25*diag[i];
58115091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[5]*sum2 + v[10]*sum3  + v[15]*sum4 + v[20]*sum5;
58215091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3  + v[16]*sum4 + v[21]*sum5;
58315091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3  + v[17]*sum4 + v[22]*sum5;
58415091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3  + v[18]*sum4 + v[23]*sum5;
58515091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3  + v[19]*sum4 + v[24]*sum5;
58615091d37SBarry Smith   }
58715091d37SBarry Smith 
58815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
58915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
59015091d37SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
59115091d37SBarry Smith   PetscFunctionReturn(0);
59215091d37SBarry Smith }
59315091d37SBarry Smith 
59415091d37SBarry Smith #undef __FUNC__
5954e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4"
5964e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
5974e2b4712SSatish Balay {
5984e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
5994e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
6004e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
6014e2b4712SSatish Balay   int             *diag = a->diag;
6023f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
6033f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp;
6044e2b4712SSatish Balay 
6054e2b4712SSatish Balay   PetscFunctionBegin;
606e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
607e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6084e2b4712SSatish Balay   tmp  = a->solve_work;
6094e2b4712SSatish Balay 
6104e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6114e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6124e2b4712SSatish Balay 
6134e2b4712SSatish Balay   /* forward solve the lower triangular */
6144e2b4712SSatish Balay   idx    = 4*(*r++);
6154e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
6164e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
6174e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
6184e2b4712SSatish Balay     v     = aa + 16*ai[i];
6194e2b4712SSatish Balay     vi    = aj + ai[i];
6204e2b4712SSatish Balay     nz    = diag[i] - ai[i];
6214e2b4712SSatish Balay     idx   = 4*(*r++);
6224e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
6234e2b4712SSatish Balay     while (nz--) {
6244e2b4712SSatish Balay       idx   = 4*(*vi++);
6254e2b4712SSatish Balay       x1    = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx];
6264e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
6274e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
6284e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
6294e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
6304e2b4712SSatish Balay       v    += 16;
6314e2b4712SSatish Balay     }
6324e2b4712SSatish Balay     idx        = 4*i;
6334e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
6344e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
6354e2b4712SSatish Balay   }
6364e2b4712SSatish Balay   /* backward solve the upper triangular */
6374e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
6384e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
6394e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
6404e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
6414e2b4712SSatish Balay     idt  = 4*i;
6424e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
6434e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
6444e2b4712SSatish Balay     while (nz--) {
6454e2b4712SSatish Balay       idx   = 4*(*vi++);
6464e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
6474e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx];
6484e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
6494e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
6504e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
6514e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
6524e2b4712SSatish Balay       v += 16;
6534e2b4712SSatish Balay     }
6544e2b4712SSatish Balay     idc      = 4*(*c--);
6554e2b4712SSatish Balay     v        = aa + 16*diag[i];
6564e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4;
6574e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4;
6584e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4;
6594e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4;
6604e2b4712SSatish Balay   }
6614e2b4712SSatish Balay 
6624e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
6634e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
664e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
665e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
66642015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
6674e2b4712SSatish Balay   PetscFunctionReturn(0);
6684e2b4712SSatish Balay }
6690ef38995SBarry Smith 
6700ef38995SBarry Smith 
6714e2b4712SSatish Balay /*
6724e2b4712SSatish Balay       Special case where the matrix was ILU(0) factored in the natural
6734e2b4712SSatish Balay    ordering. This eliminates the need for the column and row permutation.
6744e2b4712SSatish Balay */
6754e2b4712SSatish Balay #undef __FUNC__
6764e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
6774e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
6784e2b4712SSatish Balay {
6794e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
68030d4dcafSBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
68130d4dcafSBarry Smith   int             ierr,*diag = a->diag;
6823f1db9ecSBarry Smith   MatScalar       *aa=a->a;
68330d4dcafSBarry Smith   Scalar          *x,*b;
6844e2b4712SSatish Balay 
6854e2b4712SSatish Balay   PetscFunctionBegin;
686e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
687e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6884e2b4712SSatish Balay 
689aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
6902853dc0eSBarry Smith   {
6912853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
6922853dc0eSBarry Smith     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
6932853dc0eSBarry Smith   }
694aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
6952853dc0eSBarry Smith   {
6962853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
6972853dc0eSBarry Smith     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
6982853dc0eSBarry Smith   }
699aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
7002853dc0eSBarry Smith   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
701e1293385SBarry Smith #else
70230d4dcafSBarry Smith   {
7033f1db9ecSBarry Smith     Scalar    sum1,sum2,sum3,sum4,x1,x2,x3,x4;
7043f1db9ecSBarry Smith     MatScalar *v;
70530d4dcafSBarry Smith     int       jdx,idt,idx,nz,*vi,i;
706e1293385SBarry Smith 
7074e2b4712SSatish Balay   /* forward solve the lower triangular */
7084e2b4712SSatish Balay   idx    = 0;
709e1293385SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
7104e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
7114e2b4712SSatish Balay     v     =  aa      + 16*ai[i];
7124e2b4712SSatish Balay     vi    =  aj      + ai[i];
7134e2b4712SSatish Balay     nz    =  diag[i] - ai[i];
714e1293385SBarry Smith     idx   +=  4;
7154e2b4712SSatish Balay     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
7164e2b4712SSatish Balay     while (nz--) {
7174e2b4712SSatish Balay       jdx   = 4*(*vi++);
7184e2b4712SSatish Balay       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
7194e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
7204e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
7214e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
7224e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
7234e2b4712SSatish Balay       v    += 16;
7244e2b4712SSatish Balay     }
7254e2b4712SSatish Balay     x[idx]   = sum1;
7264e2b4712SSatish Balay     x[1+idx] = sum2;
7274e2b4712SSatish Balay     x[2+idx] = sum3;
7284e2b4712SSatish Balay     x[3+idx] = sum4;
7294e2b4712SSatish Balay   }
7304e2b4712SSatish Balay   /* backward solve the upper triangular */
7314e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
7324e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
7334e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
7344e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
7354e2b4712SSatish Balay     idt  = 4*i;
7364e2b4712SSatish Balay     sum1 = x[idt];  sum2 = x[1+idt];
7374e2b4712SSatish Balay     sum3 = x[2+idt];sum4 = x[3+idt];
7384e2b4712SSatish Balay     while (nz--) {
7394e2b4712SSatish Balay       idx   = 4*(*vi++);
7404e2b4712SSatish Balay       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
7414e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
7424e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
7434e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
7444e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
7454e2b4712SSatish Balay       v    += 16;
7464e2b4712SSatish Balay     }
7474e2b4712SSatish Balay     v        = aa + 16*diag[i];
7484e2b4712SSatish Balay     x[idt]   = v[0]*sum1 + v[4]*sum2 + v[8]*sum3  + v[12]*sum4;
7494e2b4712SSatish Balay     x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3  + v[13]*sum4;
7504e2b4712SSatish Balay     x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4;
7514e2b4712SSatish Balay     x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4;
7524e2b4712SSatish Balay   }
75330d4dcafSBarry Smith   }
754e1293385SBarry Smith #endif
7554e2b4712SSatish Balay 
756e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
757e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
75842015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
7594e2b4712SSatish Balay   PetscFunctionReturn(0);
7604e2b4712SSatish Balay }
7614e2b4712SSatish Balay 
762667159a5SBarry Smith #undef __FUNC__
7634e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3"
7644e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
7654e2b4712SSatish Balay {
7664e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
7674e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
7684e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
7694e2b4712SSatish Balay   int             *diag = a->diag;
7703f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
7713f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp;
7724e2b4712SSatish Balay 
7734e2b4712SSatish Balay   PetscFunctionBegin;
774e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
775e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7764e2b4712SSatish Balay   tmp  = a->solve_work;
7774e2b4712SSatish Balay 
7784e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7794e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
7804e2b4712SSatish Balay 
7814e2b4712SSatish Balay   /* forward solve the lower triangular */
7824e2b4712SSatish Balay   idx    = 3*(*r++);
7834e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx];
7844e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
7854e2b4712SSatish Balay     v     = aa + 9*ai[i];
7864e2b4712SSatish Balay     vi    = aj + ai[i];
7874e2b4712SSatish Balay     nz    = diag[i] - ai[i];
7884e2b4712SSatish Balay     idx   = 3*(*r++);
7894e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx];
7904e2b4712SSatish Balay     while (nz--) {
7914e2b4712SSatish Balay       idx   = 3*(*vi++);
7924e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
7934e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
7944e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
7954e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
7964e2b4712SSatish Balay       v += 9;
7974e2b4712SSatish Balay     }
7984e2b4712SSatish Balay     idx = 3*i;
7994e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3;
8004e2b4712SSatish Balay   }
8014e2b4712SSatish Balay   /* backward solve the upper triangular */
8024e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
8034e2b4712SSatish Balay     v    = aa + 9*diag[i] + 9;
8044e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
8054e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
8064e2b4712SSatish Balay     idt  = 3*i;
8074e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt];
8084e2b4712SSatish Balay     while (nz--) {
8094e2b4712SSatish Balay       idx   = 3*(*vi++);
8104e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
8114e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
8124e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
8134e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
8144e2b4712SSatish Balay       v += 9;
8154e2b4712SSatish Balay     }
8164e2b4712SSatish Balay     idc = 3*(*c--);
8174e2b4712SSatish Balay     v   = aa + 9*diag[i];
8184e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
8194e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
8204e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
8214e2b4712SSatish Balay   }
8224e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
8234e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
824e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
825e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
82642015d63SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
8274e2b4712SSatish Balay   PetscFunctionReturn(0);
8284e2b4712SSatish Balay }
8294e2b4712SSatish Balay 
83015091d37SBarry Smith /*
83115091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
83215091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
83315091d37SBarry Smith */
83415091d37SBarry Smith #undef __FUNC__
83515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering"
83615091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
83715091d37SBarry Smith {
83815091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
83915091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
84015091d37SBarry Smith   int             ierr,*diag = a->diag;
84115091d37SBarry Smith   MatScalar       *aa=a->a, *v;
84215091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3;
84315091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
84415091d37SBarry Smith 
84515091d37SBarry Smith   PetscFunctionBegin;
84615091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
84715091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
84815091d37SBarry Smith 
84915091d37SBarry Smith 
85015091d37SBarry Smith   /* forward solve the lower triangular */
85115091d37SBarry Smith   idx    = 0;
85215091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2];
85315091d37SBarry Smith   for ( i=1; i<n; i++ ) {
85415091d37SBarry Smith     v     =  aa      + 9*ai[i];
85515091d37SBarry Smith     vi    =  aj      + ai[i];
85615091d37SBarry Smith     nz    =  diag[i] - ai[i];
85715091d37SBarry Smith     idx   +=  3;
85815091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];
85915091d37SBarry Smith     while (nz--) {
86015091d37SBarry Smith       jdx   = 3*(*vi++);
86115091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
86215091d37SBarry Smith       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
86315091d37SBarry Smith       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
86415091d37SBarry Smith       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
86515091d37SBarry Smith       v    += 9;
86615091d37SBarry Smith     }
86715091d37SBarry Smith     x[idx]   = sum1;
86815091d37SBarry Smith     x[1+idx] = sum2;
86915091d37SBarry Smith     x[2+idx] = sum3;
87015091d37SBarry Smith   }
87115091d37SBarry Smith   /* backward solve the upper triangular */
87215091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
87315091d37SBarry Smith     v    = aa + 9*diag[i] + 9;
87415091d37SBarry Smith     vi   = aj + diag[i] + 1;
87515091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
87615091d37SBarry Smith     idt  = 3*i;
87715091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
87815091d37SBarry Smith     sum3 = x[2+idt];
87915091d37SBarry Smith     while (nz--) {
88015091d37SBarry Smith       idx   = 3*(*vi++);
88115091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
88215091d37SBarry Smith       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
88315091d37SBarry Smith       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
88415091d37SBarry Smith       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
88515091d37SBarry Smith       v    += 9;
88615091d37SBarry Smith     }
88715091d37SBarry Smith     v        = aa +  9*diag[i];
88815091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
88915091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
89015091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
89115091d37SBarry Smith   }
89215091d37SBarry Smith 
89315091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
89415091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
89515091d37SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
89615091d37SBarry Smith   PetscFunctionReturn(0);
89715091d37SBarry Smith }
89815091d37SBarry Smith 
8994e2b4712SSatish Balay #undef __FUNC__
9004e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2"
9014e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
9024e2b4712SSatish Balay {
9034e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
9044e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
9054e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
9064e2b4712SSatish Balay   int             *diag = a->diag;
9073f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
9083f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,x1,x2,*tmp;
9094e2b4712SSatish Balay 
9104e2b4712SSatish Balay   PetscFunctionBegin;
911e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
912e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9134e2b4712SSatish Balay   tmp  = a->solve_work;
9144e2b4712SSatish Balay 
9154e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
9164e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
9174e2b4712SSatish Balay 
9184e2b4712SSatish Balay   /* forward solve the lower triangular */
9194e2b4712SSatish Balay   idx    = 2*(*r++);
9204e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx];
9214e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
9224e2b4712SSatish Balay     v     = aa + 4*ai[i];
9234e2b4712SSatish Balay     vi    = aj + ai[i];
9244e2b4712SSatish Balay     nz    = diag[i] - ai[i];
9254e2b4712SSatish Balay     idx   = 2*(*r++);
9264e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx];
9274e2b4712SSatish Balay     while (nz--) {
9284e2b4712SSatish Balay       idx   = 2*(*vi++);
9294e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
9304e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
9314e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
9324e2b4712SSatish Balay       v += 4;
9334e2b4712SSatish Balay     }
9344e2b4712SSatish Balay     idx = 2*i;
9354e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2;
9364e2b4712SSatish Balay   }
9374e2b4712SSatish Balay   /* backward solve the upper triangular */
9384e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
9394e2b4712SSatish Balay     v    = aa + 4*diag[i] + 4;
9404e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
9414e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
9424e2b4712SSatish Balay     idt  = 2*i;
9434e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt];
9444e2b4712SSatish Balay     while (nz--) {
9454e2b4712SSatish Balay       idx   = 2*(*vi++);
9464e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
9474e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
9484e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
9494e2b4712SSatish Balay       v += 4;
9504e2b4712SSatish Balay     }
9514e2b4712SSatish Balay     idc = 2*(*c--);
9524e2b4712SSatish Balay     v   = aa + 4*diag[i];
9534e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[2]*sum2;
9544e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2;
9554e2b4712SSatish Balay   }
9564e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
9574e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
958e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
959e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
96042015d63SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
9614e2b4712SSatish Balay   PetscFunctionReturn(0);
9624e2b4712SSatish Balay }
9634e2b4712SSatish Balay 
96415091d37SBarry Smith /*
96515091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
96615091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
96715091d37SBarry Smith */
96815091d37SBarry Smith #undef __FUNC__
96915091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering"
97015091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
97115091d37SBarry Smith {
97215091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
97315091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
97415091d37SBarry Smith   int             ierr,*diag = a->diag;
97515091d37SBarry Smith   MatScalar       *aa=a->a,*v;
97615091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,x1,x2;
97715091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
97815091d37SBarry Smith 
97915091d37SBarry Smith   PetscFunctionBegin;
98015091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
98115091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
98215091d37SBarry Smith 
98315091d37SBarry Smith   /* forward solve the lower triangular */
98415091d37SBarry Smith   idx    = 0;
98515091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1];
98615091d37SBarry Smith   for ( i=1; i<n; i++ ) {
98715091d37SBarry Smith     v     =  aa      + 4*ai[i];
98815091d37SBarry Smith     vi    =  aj      + ai[i];
98915091d37SBarry Smith     nz    =  diag[i] - ai[i];
99015091d37SBarry Smith     idx   +=  2;
99115091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];
99215091d37SBarry Smith     while (nz--) {
99315091d37SBarry Smith       jdx   = 2*(*vi++);
99415091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];
99515091d37SBarry Smith       sum1 -= v[0]*x1 + v[2]*x2;
99615091d37SBarry Smith       sum2 -= v[1]*x1 + v[3]*x2;
99715091d37SBarry Smith       v    += 4;
99815091d37SBarry Smith     }
99915091d37SBarry Smith     x[idx]   = sum1;
100015091d37SBarry Smith     x[1+idx] = sum2;
100115091d37SBarry Smith   }
100215091d37SBarry Smith   /* backward solve the upper triangular */
100315091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
100415091d37SBarry Smith     v    = aa + 4*diag[i] + 4;
100515091d37SBarry Smith     vi   = aj + diag[i] + 1;
100615091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
100715091d37SBarry Smith     idt  = 2*i;
100815091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
100915091d37SBarry Smith     while (nz--) {
101015091d37SBarry Smith       idx   = 2*(*vi++);
101115091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];
101215091d37SBarry Smith       sum1 -= v[0]*x1 + v[2]*x2;
101315091d37SBarry Smith       sum2 -= v[1]*x1 + v[3]*x2;
101415091d37SBarry Smith       v    += 4;
101515091d37SBarry Smith     }
101615091d37SBarry Smith     v        = aa +  4*diag[i];
101715091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[2]*sum2;
101815091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[3]*sum2;
101915091d37SBarry Smith   }
102015091d37SBarry Smith 
102115091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
102215091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
102315091d37SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
102415091d37SBarry Smith   PetscFunctionReturn(0);
102515091d37SBarry Smith }
102615091d37SBarry Smith 
10274e2b4712SSatish Balay #undef __FUNC__
10284e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1"
10294e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
10304e2b4712SSatish Balay {
10314e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
10324e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
10334e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
10344e2b4712SSatish Balay   int             *diag = a->diag;
10353f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
10363f1db9ecSBarry Smith   Scalar          *x,*b,sum1,*tmp;
10374e2b4712SSatish Balay 
10384e2b4712SSatish Balay   PetscFunctionBegin;
10394e2b4712SSatish Balay   if (!n) PetscFunctionReturn(0);
10404e2b4712SSatish Balay 
1041e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1042e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10434e2b4712SSatish Balay   tmp  = a->solve_work;
10444e2b4712SSatish Balay 
10454e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
10464e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
10474e2b4712SSatish Balay 
10484e2b4712SSatish Balay   /* forward solve the lower triangular */
10494e2b4712SSatish Balay   tmp[0] = b[*r++];
10504e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
10514e2b4712SSatish Balay     v     = aa + ai[i];
10524e2b4712SSatish Balay     vi    = aj + ai[i];
10534e2b4712SSatish Balay     nz    = diag[i] - ai[i];
10544e2b4712SSatish Balay     sum1  = b[*r++];
10554e2b4712SSatish Balay     while (nz--) {
10564e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
10574e2b4712SSatish Balay     }
10584e2b4712SSatish Balay     tmp[i] = sum1;
10594e2b4712SSatish Balay   }
10604e2b4712SSatish Balay   /* backward solve the upper triangular */
10614e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
10624e2b4712SSatish Balay     v    = aa + diag[i] + 1;
10634e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
10644e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
10654e2b4712SSatish Balay     sum1 = tmp[i];
10664e2b4712SSatish Balay     while (nz--) {
10674e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
10684e2b4712SSatish Balay     }
10694e2b4712SSatish Balay     x[*c--] = tmp[i] = aa[diag[i]]*sum1;
10704e2b4712SSatish Balay   }
10714e2b4712SSatish Balay 
10724e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
10734e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1074e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1075e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
10764e2b4712SSatish Balay   PLogFlops(2*1*(a->nz) - a->n);
10774e2b4712SSatish Balay   PetscFunctionReturn(0);
10784e2b4712SSatish Balay }
107915091d37SBarry Smith /*
108015091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
108115091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
108215091d37SBarry Smith */
108315091d37SBarry Smith #undef __FUNC__
108415091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering"
108515091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
108615091d37SBarry Smith {
108715091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
108815091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
108915091d37SBarry Smith   int             ierr,*diag = a->diag;
109015091d37SBarry Smith   MatScalar       *aa=a->a;
109115091d37SBarry Smith   Scalar          *x,*b;
109215091d37SBarry Smith   Scalar          sum1,x1;
109315091d37SBarry Smith   MatScalar       *v;
109415091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
109515091d37SBarry Smith 
109615091d37SBarry Smith   PetscFunctionBegin;
109715091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
109815091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
109915091d37SBarry Smith 
110015091d37SBarry Smith   /* forward solve the lower triangular */
110115091d37SBarry Smith   idx    = 0;
110215091d37SBarry Smith   x[0]   = b[0];
110315091d37SBarry Smith   for ( i=1; i<n; i++ ) {
110415091d37SBarry Smith     v     =  aa      + ai[i];
110515091d37SBarry Smith     vi    =  aj      + ai[i];
110615091d37SBarry Smith     nz    =  diag[i] - ai[i];
110715091d37SBarry Smith     idx   +=  1;
110815091d37SBarry Smith     sum1  =  b[idx];
110915091d37SBarry Smith     while (nz--) {
111015091d37SBarry Smith       jdx   = *vi++;
111115091d37SBarry Smith       x1    = x[jdx];
111215091d37SBarry Smith       sum1 -= v[0]*x1;
111315091d37SBarry Smith       v    += 1;
111415091d37SBarry Smith     }
111515091d37SBarry Smith     x[idx]   = sum1;
111615091d37SBarry Smith   }
111715091d37SBarry Smith   /* backward solve the upper triangular */
111815091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
111915091d37SBarry Smith     v    = aa + diag[i] + 1;
112015091d37SBarry Smith     vi   = aj + diag[i] + 1;
112115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
112215091d37SBarry Smith     idt  = i;
112315091d37SBarry Smith     sum1 = x[idt];
112415091d37SBarry Smith     while (nz--) {
112515091d37SBarry Smith       idx   = *vi++;
112615091d37SBarry Smith       x1    = x[idx];
112715091d37SBarry Smith       sum1 -= v[0]*x1;
112815091d37SBarry Smith       v    += 1;
112915091d37SBarry Smith     }
113015091d37SBarry Smith     v        = aa +  diag[i];
113115091d37SBarry Smith     x[idt]   = v[0]*sum1;
113215091d37SBarry Smith   }
113315091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
113415091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
113515091d37SBarry Smith   PLogFlops(2*(a->nz) - a->n);
113615091d37SBarry Smith   PetscFunctionReturn(0);
113715091d37SBarry Smith }
11384e2b4712SSatish Balay 
11394e2b4712SSatish Balay /* ----------------------------------------------------------------*/
11404e2b4712SSatish Balay /*
11414e2b4712SSatish Balay      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
11424e2b4712SSatish Balay    except that the data structure of Mat_SeqAIJ is slightly different.
11434e2b4712SSatish Balay    Not a good example of code reuse.
11444e2b4712SSatish Balay */
1145435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat);
1146435faa5fSBarry Smith 
11474e2b4712SSatish Balay #undef __FUNC__
11484e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
1149435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
11504e2b4712SSatish Balay {
11514e2b4712SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
11524e2b4712SSatish Balay   IS          isicol;
11534e2b4712SSatish Balay   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
11544e2b4712SSatish Balay   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
1155335d9088SBarry Smith   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
1156435faa5fSBarry Smith   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill;
11574e2b4712SSatish Balay   PetscTruth  col_identity, row_identity;
1158435faa5fSBarry Smith   double      f;
11594e2b4712SSatish Balay 
11604e2b4712SSatish Balay   PetscFunctionBegin;
1161435faa5fSBarry Smith   if (info) {
1162435faa5fSBarry Smith     f             = info->fill;
1163335d9088SBarry Smith     levels        = (int) info->levels;
1164335d9088SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
1165435faa5fSBarry Smith   } else {
1166435faa5fSBarry Smith     f             = 1.0;
1167435faa5fSBarry Smith     levels        = 0;
1168435faa5fSBarry Smith     diagonal_fill = 0;
1169435faa5fSBarry Smith   }
1170e51c0b9cSSatish Balay   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
1171e51c0b9cSSatish Balay 
11724e2b4712SSatish Balay   /* special case that simply copies fill pattern */
11734e2b4712SSatish Balay   PetscValidHeaderSpecific(isrow,IS_COOKIE);
11744e2b4712SSatish Balay   PetscValidHeaderSpecific(iscol,IS_COOKIE);
1175667159a5SBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1176667159a5SBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
11774e2b4712SSatish Balay   if (levels == 0 && row_identity && col_identity) {
1178e1293385SBarry Smith     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
11794e2b4712SSatish Balay     (*fact)->factor = FACTOR_LU;
11804e2b4712SSatish Balay     b               = (Mat_SeqBAIJ *) (*fact)->data;
11814e2b4712SSatish Balay     if (!b->diag) {
11824e2b4712SSatish Balay       ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
11834e2b4712SSatish Balay     }
1184435faa5fSBarry Smith     ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
11854e2b4712SSatish Balay     b->row        = isrow;
11864e2b4712SSatish Balay     b->col        = iscol;
1187e51c0b9cSSatish Balay     b->icol       = isicol;
11884e2b4712SSatish Balay     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
11894e2b4712SSatish Balay    /*
119015091d37SBarry Smith         Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
119115091d37SBarry Smith         for ILU(0) factorization with natural ordering
11924e2b4712SSatish Balay    */
119315091d37SBarry Smith     switch (b->bs) {
119415091d37SBarry Smith     case 2:
119515091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
119615091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
119715091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
119815091d37SBarry Smith       break;
119915091d37SBarry Smith     case 3:
120015091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
120115091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
120215091d37SBarry Smith       PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
120315091d37SBarry Smith       break;
120415091d37SBarry Smith     case 4:
1205f830108cSBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1206f830108cSBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
120715091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
120815091d37SBarry Smith       break;
120915091d37SBarry Smith     case 5:
1210667159a5SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1211667159a5SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
121215091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
121315091d37SBarry Smith       break;
121415091d37SBarry Smith     case 6:
121515091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
121615091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
121715091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
121815091d37SBarry Smith       break;
121915091d37SBarry Smith     case 7:
122015091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
122115091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
122215091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
122315091d37SBarry Smith     break;
12244e2b4712SSatish Balay   }
12254e2b4712SSatish Balay     PetscFunctionReturn(0);
12264e2b4712SSatish Balay   }
12274e2b4712SSatish Balay 
12284e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
12294e2b4712SSatish Balay   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
12304e2b4712SSatish Balay 
12314e2b4712SSatish Balay   /* get new row pointers */
12324e2b4712SSatish Balay   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
12334e2b4712SSatish Balay   ainew[0] = 0;
12344e2b4712SSatish Balay   /* don't know how many column pointers are needed so estimate */
12354e2b4712SSatish Balay   jmax = (int) (f*ai[n] + 1);
12364e2b4712SSatish Balay   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
12374e2b4712SSatish Balay   /* ajfill is level of fill for each fill entry */
12384e2b4712SSatish Balay   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
12394e2b4712SSatish Balay   /* fill is a linked list of nonzeros in active row */
12404e2b4712SSatish Balay   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
12414e2b4712SSatish Balay   /* im is level for each filled value */
12424e2b4712SSatish Balay   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
12434e2b4712SSatish Balay   /* dloc is location of diagonal in factor */
12444e2b4712SSatish Balay   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
12454e2b4712SSatish Balay   dloc[0]  = 0;
12464e2b4712SSatish Balay   for ( prow=0; prow<n; prow++ ) {
1247435faa5fSBarry Smith 
1248435faa5fSBarry Smith     /* copy prow into linked list */
12494e2b4712SSatish Balay     nzf        = nz  = ai[r[prow]+1] - ai[r[prow]];
12504e2b4712SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
12514e2b4712SSatish Balay     xi         = aj + ai[r[prow]];
12524e2b4712SSatish Balay     fill[n]    = n;
1253435faa5fSBarry Smith     fill[prow] = -1; /* marker for diagonal entry */
12544e2b4712SSatish Balay     while (nz--) {
12554e2b4712SSatish Balay       fm  = n;
12564e2b4712SSatish Balay       idx = ic[*xi++];
12574e2b4712SSatish Balay       do {
12584e2b4712SSatish Balay         m  = fm;
12594e2b4712SSatish Balay         fm = fill[m];
12604e2b4712SSatish Balay       } while (fm < idx);
12614e2b4712SSatish Balay       fill[m]   = idx;
12624e2b4712SSatish Balay       fill[idx] = fm;
12634e2b4712SSatish Balay       im[idx]   = 0;
12644e2b4712SSatish Balay     }
1265435faa5fSBarry Smith 
1266435faa5fSBarry Smith     /* make sure diagonal entry is included */
1267435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
1268435faa5fSBarry Smith       fm = n;
1269435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
1270435faa5fSBarry Smith       fill[prow] = fill[fm];  /* insert diagonal into linked list */
1271435faa5fSBarry Smith       fill[fm]   = prow;
1272435faa5fSBarry Smith       im[prow]   = 0;
1273435faa5fSBarry Smith       nzf++;
1274335d9088SBarry Smith       dcount++;
1275435faa5fSBarry Smith     }
1276435faa5fSBarry Smith 
12774e2b4712SSatish Balay     nzi = 0;
12784e2b4712SSatish Balay     row = fill[n];
12794e2b4712SSatish Balay     while ( row < prow ) {
12804e2b4712SSatish Balay       incrlev = im[row] + 1;
12814e2b4712SSatish Balay       nz      = dloc[row];
1282435faa5fSBarry Smith       xi      = ajnew  + ainew[row] + nz + 1;
12834e2b4712SSatish Balay       flev    = ajfill + ainew[row] + nz + 1;
12844e2b4712SSatish Balay       nnz     = ainew[row+1] - ainew[row] - nz - 1;
12854e2b4712SSatish Balay       fm      = row;
12864e2b4712SSatish Balay       while (nnz-- > 0) {
12874e2b4712SSatish Balay         idx = *xi++;
12884e2b4712SSatish Balay         if (*flev + incrlev > levels) {
12894e2b4712SSatish Balay           flev++;
12904e2b4712SSatish Balay           continue;
12914e2b4712SSatish Balay         }
12924e2b4712SSatish Balay         do {
12934e2b4712SSatish Balay           m  = fm;
12944e2b4712SSatish Balay           fm = fill[m];
12954e2b4712SSatish Balay         } while (fm < idx);
12964e2b4712SSatish Balay         if (fm != idx) {
12974e2b4712SSatish Balay           im[idx]   = *flev + incrlev;
12984e2b4712SSatish Balay           fill[m]   = idx;
12994e2b4712SSatish Balay           fill[idx] = fm;
13004e2b4712SSatish Balay           fm        = idx;
13014e2b4712SSatish Balay           nzf++;
1302ecf371e4SBarry Smith         } else {
13034e2b4712SSatish Balay           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
13044e2b4712SSatish Balay         }
13054e2b4712SSatish Balay         flev++;
13064e2b4712SSatish Balay       }
13074e2b4712SSatish Balay       row = fill[row];
13084e2b4712SSatish Balay       nzi++;
13094e2b4712SSatish Balay     }
13104e2b4712SSatish Balay     /* copy new filled row into permanent storage */
13114e2b4712SSatish Balay     ainew[prow+1] = ainew[prow] + nzf;
13124e2b4712SSatish Balay     if (ainew[prow+1] > jmax) {
1313ecf371e4SBarry Smith 
1314ecf371e4SBarry Smith       /* estimate how much additional space we will need */
1315ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1316ecf371e4SBarry Smith       /* just double the memory each time */
1317ecf371e4SBarry Smith       int maxadd = jmax;
1318ecf371e4SBarry Smith       /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */
13194e2b4712SSatish Balay       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
13204e2b4712SSatish Balay       jmax += maxadd;
1321ecf371e4SBarry Smith 
1322ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
13234e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1324549d3d68SSatish Balay       ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr);
1325*606d414cSSatish Balay       ierr = PetscFree(ajnew);CHKERRQ(ierr);
13264e2b4712SSatish Balay       ajnew = xi;
13274e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1328549d3d68SSatish Balay       ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr);
1329*606d414cSSatish Balay       ierr = PetscFree(ajfill);CHKERRQ(ierr);
13304e2b4712SSatish Balay       ajfill = xi;
1331ecf371e4SBarry Smith       realloc++; /* count how many reallocations are needed */
13324e2b4712SSatish Balay     }
13334e2b4712SSatish Balay     xi          = ajnew + ainew[prow];
13344e2b4712SSatish Balay     flev        = ajfill + ainew[prow];
13354e2b4712SSatish Balay     dloc[prow]  = nzi;
13364e2b4712SSatish Balay     fm          = fill[n];
13374e2b4712SSatish Balay     while (nzf--) {
13384e2b4712SSatish Balay       *xi++   = fm;
13394e2b4712SSatish Balay       *flev++ = im[fm];
13404e2b4712SSatish Balay       fm      = fill[fm];
13414e2b4712SSatish Balay     }
1342435faa5fSBarry Smith     /* make sure row has diagonal entry */
1343435faa5fSBarry Smith     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1344435faa5fSBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
1345435faa5fSBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1346435faa5fSBarry Smith     }
13474e2b4712SSatish Balay   }
1348*606d414cSSatish Balay   ierr = PetscFree(ajfill);CHKERRQ(ierr);
13494e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
13504e2b4712SSatish Balay   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1351*606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
1352*606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
13534e2b4712SSatish Balay 
13544e2b4712SSatish Balay   {
13554e2b4712SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
1356981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
13574e2b4712SSatish Balay              realloc,f,af);
1358981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
1359981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
1360981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
1361335d9088SBarry Smith     if (diagonal_fill) {
1362335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
1363335d9088SBarry Smith     }
13644e2b4712SSatish Balay   }
13654e2b4712SSatish Balay 
13664e2b4712SSatish Balay   /* put together the new matrix */
13674e2b4712SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1368fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
13694e2b4712SSatish Balay   b = (Mat_SeqBAIJ *) (*fact)->data;
1370*606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
13714e2b4712SSatish Balay   b->singlemalloc = 0;
13723f1db9ecSBarry Smith   len = bs2*ainew[n]*sizeof(MatScalar);
13734e2b4712SSatish Balay   /* the next line frees the default space generated by the Create() */
1374*606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
1375*606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
13763f1db9ecSBarry Smith   b->a          = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
13774e2b4712SSatish Balay   b->j          = ajnew;
13784e2b4712SSatish Balay   b->i          = ainew;
13794e2b4712SSatish Balay   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
13804e2b4712SSatish Balay   b->diag       = dloc;
13814e2b4712SSatish Balay   b->ilen       = 0;
13824e2b4712SSatish Balay   b->imax       = 0;
13834e2b4712SSatish Balay   b->row        = isrow;
13844e2b4712SSatish Balay   b->col        = iscol;
1385e51c0b9cSSatish Balay   b->icol       = isicol;
13864e2b4712SSatish Balay   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
13874e2b4712SSatish Balay   /* In b structure:  Free imax, ilen, old a, old j.
13884e2b4712SSatish Balay      Allocate dloc, solve_work, new a, new j */
13894e2b4712SSatish Balay   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
13904e2b4712SSatish Balay   b->maxnz          = b->nz = ainew[n];
13914e2b4712SSatish Balay   (*fact)->factor   = FACTOR_LU;
13924e2b4712SSatish Balay 
13934e2b4712SSatish Balay   (*fact)->info.factor_mallocs    = realloc;
13944e2b4712SSatish Balay   (*fact)->info.fill_ratio_given  = f;
13954e2b4712SSatish Balay   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
13964e2b4712SSatish Balay 
13974e2b4712SSatish Balay   PetscFunctionReturn(0);
13984e2b4712SSatish Balay }
13994e2b4712SSatish Balay 
14004e2b4712SSatish Balay 
14014e2b4712SSatish Balay 
14024e2b4712SSatish Balay 
1403