xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision 3f1db9ec2fd39765c6c3a00831044586630c4cca)
14e2b4712SSatish Balay #ifdef PETSC_RCS_HEADER
2*3f1db9ecSBarry Smith static char vcid[] = "$Id: baijfact2.c,v 1.17 1998/10/28 19:53:03 bsmith Exp bsmith $";
34e2b4712SSatish Balay #endif
44e2b4712SSatish Balay /*
54e2b4712SSatish Balay     Factorization code for BAIJ format.
64e2b4712SSatish Balay */
74e2b4712SSatish Balay 
84e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h"
94e2b4712SSatish Balay #include "src/vec/vecimpl.h"
104e2b4712SSatish Balay #include "src/inline/ilu.h"
1174c49faeSBarry Smith #include "src/inline/dot.h"
124e2b4712SSatish Balay 
134e2b4712SSatish Balay /* ----------------------------------------------------------- */
144e2b4712SSatish Balay #undef __FUNC__
154e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N"
164e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
174e2b4712SSatish Balay {
184e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
194e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
204e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
214e2b4712SSatish Balay   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
22*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
23*3f1db9ecSBarry Smith   Scalar          *x,*b,*sum,*tmp,*lsum;
244e2b4712SSatish Balay 
254e2b4712SSatish Balay   PetscFunctionBegin;
26e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
284e2b4712SSatish Balay   tmp  = a->solve_work;
294e2b4712SSatish Balay 
304e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
314e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
324e2b4712SSatish Balay 
334e2b4712SSatish Balay   /* forward solve the lower triangular */
344e2b4712SSatish Balay   PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar));
354e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
364e2b4712SSatish Balay     v   = aa + bs2*ai[i];
374e2b4712SSatish Balay     vi  = aj + ai[i];
384e2b4712SSatish Balay     nz  = a->diag[i] - ai[i];
394e2b4712SSatish Balay     sum = tmp + bs*i;
404e2b4712SSatish Balay     PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));
414e2b4712SSatish Balay     while (nz--) {
424e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++));
434e2b4712SSatish Balay       v += bs2;
444e2b4712SSatish Balay     }
454e2b4712SSatish Balay   }
464e2b4712SSatish Balay   /* backward solve the upper triangular */
474e2b4712SSatish Balay   lsum = a->solve_work + a->n;
484e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
494e2b4712SSatish Balay     v   = aa + bs2*(a->diag[i] + 1);
504e2b4712SSatish Balay     vi  = aj + a->diag[i] + 1;
514e2b4712SSatish Balay     nz  = ai[i+1] - a->diag[i] - 1;
524e2b4712SSatish Balay     PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));
534e2b4712SSatish Balay     while (nz--) {
544e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++));
554e2b4712SSatish Balay       v += bs2;
564e2b4712SSatish Balay     }
574e2b4712SSatish Balay     Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs);
584e2b4712SSatish Balay     PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));
594e2b4712SSatish Balay   }
604e2b4712SSatish Balay 
614e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
624e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
63e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
64e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
6542015d63SBarry Smith   PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n);
664e2b4712SSatish Balay   PetscFunctionReturn(0);
674e2b4712SSatish Balay }
684e2b4712SSatish Balay 
694e2b4712SSatish Balay #undef __FUNC__
704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7"
714e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
724e2b4712SSatish Balay {
734e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
744e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
754e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
764e2b4712SSatish Balay   int             *diag = a->diag;
77*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
78*3f1db9ecSBarry Smith   Scalar          sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
79*3f1db9ecSBarry Smith   Scalar          *x,*b,*tmp;
804e2b4712SSatish Balay 
814e2b4712SSatish Balay   PetscFunctionBegin;
82e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
83e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
844e2b4712SSatish Balay   tmp  = a->solve_work;
854e2b4712SSatish Balay 
864e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
874e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
884e2b4712SSatish Balay 
894e2b4712SSatish Balay   /* forward solve the lower triangular */
904e2b4712SSatish Balay   idx    = 7*(*r++);
914e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
924e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
934e2b4712SSatish Balay   tmp[5] = b[5+idx]; tmp[6] = b[6+idx];
944e2b4712SSatish Balay 
954e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
964e2b4712SSatish Balay     v     = aa + 49*ai[i];
974e2b4712SSatish Balay     vi    = aj + ai[i];
984e2b4712SSatish Balay     nz    = diag[i] - ai[i];
994e2b4712SSatish Balay     idx   = 7*(*r++);
1004e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
1014e2b4712SSatish Balay     sum5  = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx];
1024e2b4712SSatish Balay     while (nz--) {
1034e2b4712SSatish Balay       idx   = 7*(*vi++);
1044e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
1054e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
1064e2b4712SSatish Balay       x6    = tmp[5+idx];x7 = tmp[6+idx];
1074e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1084e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1094e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1104e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1114e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1124e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1134e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1144e2b4712SSatish Balay       v += 49;
1154e2b4712SSatish Balay     }
1164e2b4712SSatish Balay     idx = 7*i;
1174e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
1184e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
1194e2b4712SSatish Balay     tmp[5+idx] = sum6;tmp[6+idx] = sum7;
1204e2b4712SSatish Balay   }
1214e2b4712SSatish Balay   /* backward solve the upper triangular */
1224e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
1234e2b4712SSatish Balay     v    = aa + 49*diag[i] + 49;
1244e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
1254e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
1264e2b4712SSatish Balay     idt  = 7*i;
1274e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
1284e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
1294e2b4712SSatish Balay     sum6 = tmp[5+idt];sum7 = tmp[6+idt];
1304e2b4712SSatish Balay     while (nz--) {
1314e2b4712SSatish Balay       idx   = 7*(*vi++);
1324e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
1334e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
1344e2b4712SSatish Balay       x6    = tmp[5+idx]; x7 = tmp[6+idx];
1354e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1364e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1374e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1384e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1394e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1404e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1414e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1424e2b4712SSatish Balay       v += 49;
1434e2b4712SSatish Balay     }
1444e2b4712SSatish Balay     idc = 7*(*c--);
1454e2b4712SSatish Balay     v   = aa + 49*diag[i];
1464e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[7]*sum2+v[14]*sum3+
1474e2b4712SSatish Balay                                  v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7;
1484e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+
1494e2b4712SSatish Balay                                  v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7;
1504e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+
1514e2b4712SSatish Balay                                  v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7;
1524e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+
1534e2b4712SSatish Balay                                  v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7;
1544e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+
1554e2b4712SSatish Balay                                  v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7;
1564e2b4712SSatish Balay     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+
1574e2b4712SSatish Balay                                  v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7;
1584e2b4712SSatish Balay     x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+
1594e2b4712SSatish Balay                                  v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7;
1604e2b4712SSatish Balay   }
1614e2b4712SSatish Balay 
1624e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
1634e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
164e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
165e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
16642015d63SBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
1674e2b4712SSatish Balay   PetscFunctionReturn(0);
1684e2b4712SSatish Balay }
1694e2b4712SSatish Balay 
1704e2b4712SSatish Balay #undef __FUNC__
1714e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5"
1724e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
1734e2b4712SSatish Balay {
1744e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
1754e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
1764e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1774e2b4712SSatish Balay   int             *diag = a->diag;
178*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
179*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp;
1804e2b4712SSatish Balay 
1814e2b4712SSatish Balay   PetscFunctionBegin;
182e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
183e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
1844e2b4712SSatish Balay   tmp  = a->solve_work;
1854e2b4712SSatish Balay 
1864e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1874e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1884e2b4712SSatish Balay 
1894e2b4712SSatish Balay   /* forward solve the lower triangular */
1904e2b4712SSatish Balay   idx    = 5*(*r++);
1914e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
1924e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
1934e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
1944e2b4712SSatish Balay     v     = aa + 25*ai[i];
1954e2b4712SSatish Balay     vi    = aj + ai[i];
1964e2b4712SSatish Balay     nz    = diag[i] - ai[i];
1974e2b4712SSatish Balay     idx   = 5*(*r++);
1984e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
1994e2b4712SSatish Balay     sum5  = b[4+idx];
2004e2b4712SSatish Balay     while (nz--) {
2014e2b4712SSatish Balay       idx   = 5*(*vi++);
2024e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
2034e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
2044e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
2054e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
2064e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
2074e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
2084e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
2094e2b4712SSatish Balay       v += 25;
2104e2b4712SSatish Balay     }
2114e2b4712SSatish Balay     idx = 5*i;
2124e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
2134e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
2144e2b4712SSatish Balay   }
2154e2b4712SSatish Balay   /* backward solve the upper triangular */
2164e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
2174e2b4712SSatish Balay     v    = aa + 25*diag[i] + 25;
2184e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
2194e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
2204e2b4712SSatish Balay     idt  = 5*i;
2214e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
2224e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
2234e2b4712SSatish Balay     while (nz--) {
2244e2b4712SSatish Balay       idx   = 5*(*vi++);
2254e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
2264e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
2274e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
2284e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
2294e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
2304e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
2314e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
2324e2b4712SSatish Balay       v += 25;
2334e2b4712SSatish Balay     }
2344e2b4712SSatish Balay     idc = 5*(*c--);
2354e2b4712SSatish Balay     v   = aa + 25*diag[i];
2364e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[5]*sum2+v[10]*sum3+
2374e2b4712SSatish Balay                                  v[15]*sum4+v[20]*sum5;
2384e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+
2394e2b4712SSatish Balay                                  v[16]*sum4+v[21]*sum5;
2404e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+
2414e2b4712SSatish Balay                                  v[17]*sum4+v[22]*sum5;
2424e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+
2434e2b4712SSatish Balay                                  v[18]*sum4+v[23]*sum5;
2444e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+
2454e2b4712SSatish Balay                                  v[19]*sum4+v[24]*sum5;
2464e2b4712SSatish Balay   }
2474e2b4712SSatish Balay 
2484e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
2494e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
250e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
251e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
25242015d63SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
2534e2b4712SSatish Balay   PetscFunctionReturn(0);
2544e2b4712SSatish Balay }
2554e2b4712SSatish Balay 
2564e2b4712SSatish Balay #undef __FUNC__
2574e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4"
2584e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
2594e2b4712SSatish Balay {
2604e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
2614e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
2624e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
2634e2b4712SSatish Balay   int             *diag = a->diag;
264*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
265*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp;
2664e2b4712SSatish Balay 
2674e2b4712SSatish Balay   PetscFunctionBegin;
268e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
269e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
2704e2b4712SSatish Balay   tmp  = a->solve_work;
2714e2b4712SSatish Balay 
2724e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
2734e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
2744e2b4712SSatish Balay 
2754e2b4712SSatish Balay   /* forward solve the lower triangular */
2764e2b4712SSatish Balay   idx    = 4*(*r++);
2774e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
2784e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
2794e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
2804e2b4712SSatish Balay     v     = aa + 16*ai[i];
2814e2b4712SSatish Balay     vi    = aj + ai[i];
2824e2b4712SSatish Balay     nz    = diag[i] - ai[i];
2834e2b4712SSatish Balay     idx   = 4*(*r++);
2844e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
2854e2b4712SSatish Balay     while (nz--) {
2864e2b4712SSatish Balay       idx   = 4*(*vi++);
2874e2b4712SSatish Balay       x1    = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx];
2884e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
2894e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
2904e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
2914e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
2924e2b4712SSatish Balay       v    += 16;
2934e2b4712SSatish Balay     }
2944e2b4712SSatish Balay     idx        = 4*i;
2954e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
2964e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
2974e2b4712SSatish Balay   }
2984e2b4712SSatish Balay   /* backward solve the upper triangular */
2994e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
3004e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
3014e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
3024e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
3034e2b4712SSatish Balay     idt  = 4*i;
3044e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
3054e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
3064e2b4712SSatish Balay     while (nz--) {
3074e2b4712SSatish Balay       idx   = 4*(*vi++);
3084e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
3094e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx];
3104e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
3114e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
3124e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
3134e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
3144e2b4712SSatish Balay       v += 16;
3154e2b4712SSatish Balay     }
3164e2b4712SSatish Balay     idc      = 4*(*c--);
3174e2b4712SSatish Balay     v        = aa + 16*diag[i];
3184e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4;
3194e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4;
3204e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4;
3214e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4;
3224e2b4712SSatish Balay   }
3234e2b4712SSatish Balay 
3244e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
3254e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
326e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
327e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
32842015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
3294e2b4712SSatish Balay   PetscFunctionReturn(0);
3304e2b4712SSatish Balay }
3314e2b4712SSatish Balay 
332*3f1db9ecSBarry Smith #define USE_FORTRAN_KERNEL_SOLVEBAIJ
333*3f1db9ecSBarry Smith 
3344e2b4712SSatish Balay /*
3354e2b4712SSatish Balay       Special case where the matrix was ILU(0) factored in the natural
3364e2b4712SSatish Balay    ordering. This eliminates the need for the column and row permutation.
3374e2b4712SSatish Balay */
3384e2b4712SSatish Balay #undef __FUNC__
3394e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
3404e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
3414e2b4712SSatish Balay {
3424e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
34330d4dcafSBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
34430d4dcafSBarry Smith   int             ierr,*diag = a->diag;
345*3f1db9ecSBarry Smith   MatScalar       *aa=a->a;
34630d4dcafSBarry Smith   Scalar          *x,*b;
3474e2b4712SSatish Balay 
3484e2b4712SSatish Balay   PetscFunctionBegin;
349e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
350e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3514e2b4712SSatish Balay 
3522853dc0eSBarry Smith #if defined(USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
3532853dc0eSBarry Smith   {
3542853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
3552853dc0eSBarry Smith     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
3562853dc0eSBarry Smith   }
3572853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJ)
3582853dc0eSBarry Smith   {
3592853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
3602853dc0eSBarry Smith     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
3612853dc0eSBarry Smith   }
3622853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
3632853dc0eSBarry Smith   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
364e1293385SBarry Smith #else
36530d4dcafSBarry Smith   {
366*3f1db9ecSBarry Smith     Scalar    sum1,sum2,sum3,sum4,x1,x2,x3,x4;
367*3f1db9ecSBarry Smith     MatScalar *v;
36830d4dcafSBarry Smith     int       jdx,idt,idx,nz,*vi,i;
369e1293385SBarry Smith 
3704e2b4712SSatish Balay   /* forward solve the lower triangular */
3714e2b4712SSatish Balay   idx    = 0;
372e1293385SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
3734e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
3744e2b4712SSatish Balay     v     =  aa      + 16*ai[i];
3754e2b4712SSatish Balay     vi    =  aj      + ai[i];
3764e2b4712SSatish Balay     nz    =  diag[i] - ai[i];
377e1293385SBarry Smith     idx   +=  4;
3784e2b4712SSatish Balay     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
3794e2b4712SSatish Balay     while (nz--) {
3804e2b4712SSatish Balay       jdx   = 4*(*vi++);
3814e2b4712SSatish Balay       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
3824e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
3834e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
3844e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
3854e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
3864e2b4712SSatish Balay       v    += 16;
3874e2b4712SSatish Balay     }
3884e2b4712SSatish Balay     x[idx]   = sum1;
3894e2b4712SSatish Balay     x[1+idx] = sum2;
3904e2b4712SSatish Balay     x[2+idx] = sum3;
3914e2b4712SSatish Balay     x[3+idx] = sum4;
3924e2b4712SSatish Balay   }
3934e2b4712SSatish Balay   /* backward solve the upper triangular */
3944e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
3954e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
3964e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
3974e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
3984e2b4712SSatish Balay     idt  = 4*i;
3994e2b4712SSatish Balay     sum1 = x[idt];  sum2 = x[1+idt];
4004e2b4712SSatish Balay     sum3 = x[2+idt];sum4 = x[3+idt];
4014e2b4712SSatish Balay     while (nz--) {
4024e2b4712SSatish Balay       idx   = 4*(*vi++);
4034e2b4712SSatish Balay       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
4044e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
4054e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
4064e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
4074e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
4084e2b4712SSatish Balay       v    += 16;
4094e2b4712SSatish Balay     }
4104e2b4712SSatish Balay     v        = aa + 16*diag[i];
4114e2b4712SSatish Balay     x[idt]   = v[0]*sum1 + v[4]*sum2 + v[8]*sum3  + v[12]*sum4;
4124e2b4712SSatish Balay     x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3  + v[13]*sum4;
4134e2b4712SSatish Balay     x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4;
4144e2b4712SSatish Balay     x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4;
4154e2b4712SSatish Balay   }
41630d4dcafSBarry Smith   }
417e1293385SBarry Smith #endif
4184e2b4712SSatish Balay 
419e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
420e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
42142015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
4224e2b4712SSatish Balay   PetscFunctionReturn(0);
4234e2b4712SSatish Balay }
4244e2b4712SSatish Balay 
425667159a5SBarry Smith #undef __FUNC__
426667159a5SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
427667159a5SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
428667159a5SBarry Smith {
429667159a5SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
430667159a5SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
431667159a5SBarry Smith   int             ierr,*diag = a->diag,jdx;
432*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
433*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;;
434667159a5SBarry Smith 
435667159a5SBarry Smith   PetscFunctionBegin;
436667159a5SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
437667159a5SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
438667159a5SBarry Smith   /* forward solve the lower triangular */
439667159a5SBarry Smith   idx    = 0;
440667159a5SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
441667159a5SBarry Smith   for ( i=1; i<n; i++ ) {
442667159a5SBarry Smith     v     =  aa + 25*ai[i];
443667159a5SBarry Smith     vi    =  aj + ai[i];
444667159a5SBarry Smith     nz    =  diag[i] - ai[i];
445667159a5SBarry Smith     idx   =  5*i;
446667159a5SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx];
447667159a5SBarry Smith     while (nz--) {
448667159a5SBarry Smith       jdx   = 5*(*vi++);
449667159a5SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
450667159a5SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
451667159a5SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
452667159a5SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
453667159a5SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
454667159a5SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
455667159a5SBarry Smith       v    += 25;
456667159a5SBarry Smith     }
457667159a5SBarry Smith     x[idx]   = sum1;
458667159a5SBarry Smith     x[1+idx] = sum2;
459667159a5SBarry Smith     x[2+idx] = sum3;
460667159a5SBarry Smith     x[3+idx] = sum4;
461667159a5SBarry Smith     x[4+idx] = sum5;
462667159a5SBarry Smith   }
463667159a5SBarry Smith   /* backward solve the upper triangular */
464667159a5SBarry Smith   for ( i=n-1; i>=0; i-- ){
465667159a5SBarry Smith     v    = aa + 25*diag[i] + 25;
466667159a5SBarry Smith     vi   = aj + diag[i] + 1;
467667159a5SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
468667159a5SBarry Smith     idt  = 5*i;
469667159a5SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
470667159a5SBarry Smith     sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt];
471667159a5SBarry Smith     while (nz--) {
472667159a5SBarry Smith       idx   = 5*(*vi++);
473667159a5SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
474667159a5SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
475667159a5SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
476667159a5SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
477667159a5SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
478667159a5SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
479667159a5SBarry Smith       v    += 25;
480667159a5SBarry Smith     }
481667159a5SBarry Smith     v        = aa + 25*diag[i];
482667159a5SBarry Smith     x[idt]   = v[0]*sum1 + v[5]*sum2 + v[10]*sum3  + v[15]*sum4 + v[20]*sum5;
483667159a5SBarry Smith     x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3  + v[16]*sum4 + v[21]*sum5;
484667159a5SBarry Smith     x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3  + v[17]*sum4 + v[22]*sum5;
485667159a5SBarry Smith     x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3  + v[18]*sum4 + v[23]*sum5;
486667159a5SBarry Smith     x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3  + v[19]*sum4 + v[24]*sum5;
487667159a5SBarry Smith   }
488667159a5SBarry Smith 
489667159a5SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
490667159a5SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
49142015d63SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
492667159a5SBarry Smith   PetscFunctionReturn(0);
493667159a5SBarry Smith }
494667159a5SBarry Smith 
4954e2b4712SSatish Balay 
4964e2b4712SSatish Balay #undef __FUNC__
4974e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3"
4984e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
4994e2b4712SSatish Balay {
5004e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
5014e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
5024e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
5034e2b4712SSatish Balay   int             *diag = a->diag;
504*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
505*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp;
5064e2b4712SSatish Balay 
5074e2b4712SSatish Balay   PetscFunctionBegin;
508e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
509e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
5104e2b4712SSatish Balay   tmp  = a->solve_work;
5114e2b4712SSatish Balay 
5124e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
5134e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
5144e2b4712SSatish Balay 
5154e2b4712SSatish Balay   /* forward solve the lower triangular */
5164e2b4712SSatish Balay   idx    = 3*(*r++);
5174e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx];
5184e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
5194e2b4712SSatish Balay     v     = aa + 9*ai[i];
5204e2b4712SSatish Balay     vi    = aj + ai[i];
5214e2b4712SSatish Balay     nz    = diag[i] - ai[i];
5224e2b4712SSatish Balay     idx   = 3*(*r++);
5234e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx];
5244e2b4712SSatish Balay     while (nz--) {
5254e2b4712SSatish Balay       idx   = 3*(*vi++);
5264e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
5274e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
5284e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
5294e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
5304e2b4712SSatish Balay       v += 9;
5314e2b4712SSatish Balay     }
5324e2b4712SSatish Balay     idx = 3*i;
5334e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3;
5344e2b4712SSatish Balay   }
5354e2b4712SSatish Balay   /* backward solve the upper triangular */
5364e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
5374e2b4712SSatish Balay     v    = aa + 9*diag[i] + 9;
5384e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
5394e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
5404e2b4712SSatish Balay     idt  = 3*i;
5414e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt];
5424e2b4712SSatish Balay     while (nz--) {
5434e2b4712SSatish Balay       idx   = 3*(*vi++);
5444e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
5454e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
5464e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
5474e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
5484e2b4712SSatish Balay       v += 9;
5494e2b4712SSatish Balay     }
5504e2b4712SSatish Balay     idc = 3*(*c--);
5514e2b4712SSatish Balay     v   = aa + 9*diag[i];
5524e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
5534e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
5544e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
5554e2b4712SSatish Balay   }
5564e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
5574e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
558e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
559e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
56042015d63SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
5614e2b4712SSatish Balay   PetscFunctionReturn(0);
5624e2b4712SSatish Balay }
5634e2b4712SSatish Balay 
5644e2b4712SSatish Balay #undef __FUNC__
5654e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2"
5664e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
5674e2b4712SSatish Balay {
5684e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
5694e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
5704e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
5714e2b4712SSatish Balay   int             *diag = a->diag;
572*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
573*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,x1,x2,*tmp;
5744e2b4712SSatish Balay 
5754e2b4712SSatish Balay   PetscFunctionBegin;
576e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
577e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
5784e2b4712SSatish Balay   tmp  = a->solve_work;
5794e2b4712SSatish Balay 
5804e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
5814e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
5824e2b4712SSatish Balay 
5834e2b4712SSatish Balay   /* forward solve the lower triangular */
5844e2b4712SSatish Balay   idx    = 2*(*r++);
5854e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx];
5864e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
5874e2b4712SSatish Balay     v     = aa + 4*ai[i];
5884e2b4712SSatish Balay     vi    = aj + ai[i];
5894e2b4712SSatish Balay     nz    = diag[i] - ai[i];
5904e2b4712SSatish Balay     idx   = 2*(*r++);
5914e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx];
5924e2b4712SSatish Balay     while (nz--) {
5934e2b4712SSatish Balay       idx   = 2*(*vi++);
5944e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
5954e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
5964e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
5974e2b4712SSatish Balay       v += 4;
5984e2b4712SSatish Balay     }
5994e2b4712SSatish Balay     idx = 2*i;
6004e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2;
6014e2b4712SSatish Balay   }
6024e2b4712SSatish Balay   /* backward solve the upper triangular */
6034e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
6044e2b4712SSatish Balay     v    = aa + 4*diag[i] + 4;
6054e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
6064e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
6074e2b4712SSatish Balay     idt  = 2*i;
6084e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt];
6094e2b4712SSatish Balay     while (nz--) {
6104e2b4712SSatish Balay       idx   = 2*(*vi++);
6114e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
6124e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
6134e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
6144e2b4712SSatish Balay       v += 4;
6154e2b4712SSatish Balay     }
6164e2b4712SSatish Balay     idc = 2*(*c--);
6174e2b4712SSatish Balay     v   = aa + 4*diag[i];
6184e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[2]*sum2;
6194e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2;
6204e2b4712SSatish Balay   }
6214e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
6224e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
623e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
624e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
62542015d63SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
6264e2b4712SSatish Balay   PetscFunctionReturn(0);
6274e2b4712SSatish Balay }
6284e2b4712SSatish Balay 
6294e2b4712SSatish Balay #undef __FUNC__
6304e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1"
6314e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
6324e2b4712SSatish Balay {
6334e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
6344e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
6354e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
6364e2b4712SSatish Balay   int             *diag = a->diag;
637*3f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
638*3f1db9ecSBarry Smith   Scalar          *x,*b,sum1,*tmp;
6394e2b4712SSatish Balay 
6404e2b4712SSatish Balay   PetscFunctionBegin;
6414e2b4712SSatish Balay   if (!n) PetscFunctionReturn(0);
6424e2b4712SSatish Balay 
643e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
644e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
6454e2b4712SSatish Balay   tmp  = a->solve_work;
6464e2b4712SSatish Balay 
6474e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6484e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6494e2b4712SSatish Balay 
6504e2b4712SSatish Balay   /* forward solve the lower triangular */
6514e2b4712SSatish Balay   tmp[0] = b[*r++];
6524e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
6534e2b4712SSatish Balay     v     = aa + ai[i];
6544e2b4712SSatish Balay     vi    = aj + ai[i];
6554e2b4712SSatish Balay     nz    = diag[i] - ai[i];
6564e2b4712SSatish Balay     sum1  = b[*r++];
6574e2b4712SSatish Balay     while (nz--) {
6584e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
6594e2b4712SSatish Balay     }
6604e2b4712SSatish Balay     tmp[i] = sum1;
6614e2b4712SSatish Balay   }
6624e2b4712SSatish Balay   /* backward solve the upper triangular */
6634e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
6644e2b4712SSatish Balay     v    = aa + diag[i] + 1;
6654e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
6664e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
6674e2b4712SSatish Balay     sum1 = tmp[i];
6684e2b4712SSatish Balay     while (nz--) {
6694e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
6704e2b4712SSatish Balay     }
6714e2b4712SSatish Balay     x[*c--] = tmp[i] = aa[diag[i]]*sum1;
6724e2b4712SSatish Balay   }
6734e2b4712SSatish Balay 
6744e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
6754e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
676e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
677e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
6784e2b4712SSatish Balay   PLogFlops(2*1*(a->nz) - a->n);
6794e2b4712SSatish Balay   PetscFunctionReturn(0);
6804e2b4712SSatish Balay }
6814e2b4712SSatish Balay 
6824e2b4712SSatish Balay /* ----------------------------------------------------------------*/
6834e2b4712SSatish Balay /*
6844e2b4712SSatish Balay      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
6854e2b4712SSatish Balay    except that the data structure of Mat_SeqAIJ is slightly different.
6864e2b4712SSatish Balay    Not a good example of code reuse.
6874e2b4712SSatish Balay */
6884e2b4712SSatish Balay #undef __FUNC__
6894e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
690667159a5SBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact)
6914e2b4712SSatish Balay {
6924e2b4712SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
6934e2b4712SSatish Balay   IS          isicol;
6944e2b4712SSatish Balay   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
6954e2b4712SSatish Balay   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
6964e2b4712SSatish Balay   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0;
6974e2b4712SSatish Balay   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2;
6984e2b4712SSatish Balay   PetscTruth  col_identity, row_identity;
6994e2b4712SSatish Balay 
7004e2b4712SSatish Balay   PetscFunctionBegin;
701e51c0b9cSSatish Balay   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
702e51c0b9cSSatish Balay 
7034e2b4712SSatish Balay   /* special case that simply copies fill pattern */
7044e2b4712SSatish Balay   PetscValidHeaderSpecific(isrow,IS_COOKIE);
7054e2b4712SSatish Balay   PetscValidHeaderSpecific(iscol,IS_COOKIE);
706667159a5SBarry Smith   ierr = ISIdentity(isrow,&row_identity); CHKERRQ(ierr);
707667159a5SBarry Smith   ierr = ISIdentity(iscol,&col_identity); CHKERRQ(ierr);
7084e2b4712SSatish Balay   if (levels == 0 && row_identity && col_identity) {
709e1293385SBarry Smith     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact); CHKERRQ(ierr);
7104e2b4712SSatish Balay     (*fact)->factor = FACTOR_LU;
7114e2b4712SSatish Balay     b               = (Mat_SeqBAIJ *) (*fact)->data;
7124e2b4712SSatish Balay     if (!b->diag) {
7134e2b4712SSatish Balay       ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
7144e2b4712SSatish Balay     }
7154e2b4712SSatish Balay     b->row        = isrow;
7164e2b4712SSatish Balay     b->col        = iscol;
717e51c0b9cSSatish Balay     b->icol       = isicol;
7184e2b4712SSatish Balay     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
7194e2b4712SSatish Balay     /*
720667159a5SBarry Smith         Blocksize 4 and 5 a special faster solver for ILU(0) factorization
7214e2b4712SSatish Balay         with natural ordering
7224e2b4712SSatish Balay     */
7234e2b4712SSatish Balay     if (b->bs == 4) {
724f830108cSBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
725f830108cSBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
726667159a5SBarry Smith       PLogInfo(A,"Using special natural ordering factor and solve BS=4\n");
727667159a5SBarry Smith     } else if (b->bs == 5) {
728667159a5SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
729667159a5SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
730667159a5SBarry Smith       PLogInfo( A,"Using special natural ordering factor and solve BS=5\n");
7314e2b4712SSatish Balay     }
7324e2b4712SSatish Balay     PetscFunctionReturn(0);
7334e2b4712SSatish Balay   }
7344e2b4712SSatish Balay 
7354e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
7364e2b4712SSatish Balay   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
7374e2b4712SSatish Balay 
7384e2b4712SSatish Balay   /* get new row pointers */
7394e2b4712SSatish Balay   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
7404e2b4712SSatish Balay   ainew[0] = 0;
7414e2b4712SSatish Balay   /* don't know how many column pointers are needed so estimate */
7424e2b4712SSatish Balay   jmax = (int) (f*ai[n] + 1);
7434e2b4712SSatish Balay   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
7444e2b4712SSatish Balay   /* ajfill is level of fill for each fill entry */
7454e2b4712SSatish Balay   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
7464e2b4712SSatish Balay   /* fill is a linked list of nonzeros in active row */
7474e2b4712SSatish Balay   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
7484e2b4712SSatish Balay   /* im is level for each filled value */
7494e2b4712SSatish Balay   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
7504e2b4712SSatish Balay   /* dloc is location of diagonal in factor */
7514e2b4712SSatish Balay   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
7524e2b4712SSatish Balay   dloc[0]  = 0;
7534e2b4712SSatish Balay   for ( prow=0; prow<n; prow++ ) {
7544e2b4712SSatish Balay     /* first copy previous fill into linked list */
7554e2b4712SSatish Balay     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
7564e2b4712SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
7574e2b4712SSatish Balay     xi      = aj + ai[r[prow]];
7584e2b4712SSatish Balay     fill[n] = n;
7594e2b4712SSatish Balay     while (nz--) {
7604e2b4712SSatish Balay       fm  = n;
7614e2b4712SSatish Balay       idx = ic[*xi++];
7624e2b4712SSatish Balay       do {
7634e2b4712SSatish Balay         m  = fm;
7644e2b4712SSatish Balay         fm = fill[m];
7654e2b4712SSatish Balay       } while (fm < idx);
7664e2b4712SSatish Balay       fill[m]   = idx;
7674e2b4712SSatish Balay       fill[idx] = fm;
7684e2b4712SSatish Balay       im[idx]   = 0;
7694e2b4712SSatish Balay     }
7704e2b4712SSatish Balay     nzi = 0;
7714e2b4712SSatish Balay     row = fill[n];
7724e2b4712SSatish Balay     while ( row < prow ) {
7734e2b4712SSatish Balay       incrlev = im[row] + 1;
7744e2b4712SSatish Balay       nz      = dloc[row];
7754e2b4712SSatish Balay       xi      = ajnew  + ainew[row] + nz;
7764e2b4712SSatish Balay       flev    = ajfill + ainew[row] + nz + 1;
7774e2b4712SSatish Balay       nnz     = ainew[row+1] - ainew[row] - nz - 1;
7784e2b4712SSatish Balay       if (*xi++ != row) {
7794e2b4712SSatish Balay         SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal");
7804e2b4712SSatish Balay       }
7814e2b4712SSatish Balay       fm      = row;
7824e2b4712SSatish Balay       while (nnz-- > 0) {
7834e2b4712SSatish Balay         idx = *xi++;
7844e2b4712SSatish Balay         if (*flev + incrlev > levels) {
7854e2b4712SSatish Balay           flev++;
7864e2b4712SSatish Balay           continue;
7874e2b4712SSatish Balay         }
7884e2b4712SSatish Balay         do {
7894e2b4712SSatish Balay           m  = fm;
7904e2b4712SSatish Balay           fm = fill[m];
7914e2b4712SSatish Balay         } while (fm < idx);
7924e2b4712SSatish Balay         if (fm != idx) {
7934e2b4712SSatish Balay           im[idx]   = *flev + incrlev;
7944e2b4712SSatish Balay           fill[m]   = idx;
7954e2b4712SSatish Balay           fill[idx] = fm;
7964e2b4712SSatish Balay           fm        = idx;
7974e2b4712SSatish Balay           nzf++;
798ecf371e4SBarry Smith         } else {
7994e2b4712SSatish Balay           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
8004e2b4712SSatish Balay         }
8014e2b4712SSatish Balay         flev++;
8024e2b4712SSatish Balay       }
8034e2b4712SSatish Balay       row = fill[row];
8044e2b4712SSatish Balay       nzi++;
8054e2b4712SSatish Balay     }
8064e2b4712SSatish Balay     /* copy new filled row into permanent storage */
8074e2b4712SSatish Balay     ainew[prow+1] = ainew[prow] + nzf;
8084e2b4712SSatish Balay     if (ainew[prow+1] > jmax) {
809ecf371e4SBarry Smith 
810ecf371e4SBarry Smith       /* estimate how much additional space we will need */
811ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
812ecf371e4SBarry Smith       /* just double the memory each time */
813ecf371e4SBarry Smith       int maxadd = jmax;
814ecf371e4SBarry Smith       /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */
8154e2b4712SSatish Balay       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
8164e2b4712SSatish Balay       jmax += maxadd;
817ecf371e4SBarry Smith 
818ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
8194e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
8204e2b4712SSatish Balay       PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));
8214e2b4712SSatish Balay       PetscFree(ajnew);
8224e2b4712SSatish Balay       ajnew = xi;
8234e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
8244e2b4712SSatish Balay       PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));
8254e2b4712SSatish Balay       PetscFree(ajfill);
8264e2b4712SSatish Balay       ajfill = xi;
827ecf371e4SBarry Smith       realloc++; /* count how many reallocations are needed */
8284e2b4712SSatish Balay     }
8294e2b4712SSatish Balay     xi          = ajnew + ainew[prow];
8304e2b4712SSatish Balay     flev        = ajfill + ainew[prow];
8314e2b4712SSatish Balay     dloc[prow]  = nzi;
8324e2b4712SSatish Balay     fm          = fill[n];
8334e2b4712SSatish Balay     while (nzf--) {
8344e2b4712SSatish Balay       *xi++   = fm;
8354e2b4712SSatish Balay       *flev++ = im[fm];
8364e2b4712SSatish Balay       fm      = fill[fm];
8374e2b4712SSatish Balay     }
8384e2b4712SSatish Balay   }
8394e2b4712SSatish Balay   PetscFree(ajfill);
8404e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
8414e2b4712SSatish Balay   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
8424e2b4712SSatish Balay   PetscFree(fill); PetscFree(im);
8434e2b4712SSatish Balay 
8444e2b4712SSatish Balay   {
8454e2b4712SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
846981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
8474e2b4712SSatish Balay              realloc,f,af);
848981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
849981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
850981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
8514e2b4712SSatish Balay   }
8524e2b4712SSatish Balay 
8534e2b4712SSatish Balay   /* put together the new matrix */
8544e2b4712SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
855fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
8564e2b4712SSatish Balay   b = (Mat_SeqBAIJ *) (*fact)->data;
8574e2b4712SSatish Balay   PetscFree(b->imax);
8584e2b4712SSatish Balay   b->singlemalloc = 0;
859*3f1db9ecSBarry Smith   len = bs2*ainew[n]*sizeof(MatScalar);
8604e2b4712SSatish Balay   /* the next line frees the default space generated by the Create() */
8614e2b4712SSatish Balay   PetscFree(b->a); PetscFree(b->ilen);
862*3f1db9ecSBarry Smith   b->a          = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
8634e2b4712SSatish Balay   b->j          = ajnew;
8644e2b4712SSatish Balay   b->i          = ainew;
8654e2b4712SSatish Balay   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
8664e2b4712SSatish Balay   b->diag       = dloc;
8674e2b4712SSatish Balay   b->ilen       = 0;
8684e2b4712SSatish Balay   b->imax       = 0;
8694e2b4712SSatish Balay   b->row        = isrow;
8704e2b4712SSatish Balay   b->col        = iscol;
871e51c0b9cSSatish Balay   b->icol       = isicol;
8724e2b4712SSatish Balay   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
8734e2b4712SSatish Balay   /* In b structure:  Free imax, ilen, old a, old j.
8744e2b4712SSatish Balay      Allocate dloc, solve_work, new a, new j */
8754e2b4712SSatish Balay   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
8764e2b4712SSatish Balay   b->maxnz          = b->nz = ainew[n];
8774e2b4712SSatish Balay   (*fact)->factor   = FACTOR_LU;
8784e2b4712SSatish Balay 
8794e2b4712SSatish Balay   (*fact)->info.factor_mallocs    = realloc;
8804e2b4712SSatish Balay   (*fact)->info.fill_ratio_given  = f;
8814e2b4712SSatish Balay   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
8824e2b4712SSatish Balay 
8834e2b4712SSatish Balay   PetscFunctionReturn(0);
8844e2b4712SSatish Balay }
8854e2b4712SSatish Balay 
8864e2b4712SSatish Balay 
8874e2b4712SSatish Balay 
8884e2b4712SSatish Balay 
889