xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision 15091d3721b14bd18f7efa625bb8738e103dca31)
14e2b4712SSatish Balay #ifdef PETSC_RCS_HEADER
2744db789SBarry Smith static char vcid[] = "$Id: baijfact2.c,v 1.23 1999/01/24 19:57:49 bsmith Exp bsmith $";
34e2b4712SSatish Balay #endif
44e2b4712SSatish Balay /*
54e2b4712SSatish Balay     Factorization code for BAIJ format.
64e2b4712SSatish Balay */
74e2b4712SSatish Balay 
84e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h"
94e2b4712SSatish Balay #include "src/vec/vecimpl.h"
104e2b4712SSatish Balay #include "src/inline/ilu.h"
1174c49faeSBarry Smith #include "src/inline/dot.h"
124e2b4712SSatish Balay 
134e2b4712SSatish Balay /* ----------------------------------------------------------- */
144e2b4712SSatish Balay #undef __FUNC__
154e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N"
164e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
174e2b4712SSatish Balay {
184e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
194e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
204e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
214e2b4712SSatish Balay   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
223f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
233f1db9ecSBarry Smith   Scalar          *x,*b,*sum,*tmp,*lsum;
244e2b4712SSatish Balay 
254e2b4712SSatish Balay   PetscFunctionBegin;
26e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
284e2b4712SSatish Balay   tmp  = a->solve_work;
294e2b4712SSatish Balay 
304e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
314e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
324e2b4712SSatish Balay 
334e2b4712SSatish Balay   /* forward solve the lower triangular */
344e2b4712SSatish Balay   PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar));
354e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
364e2b4712SSatish Balay     v   = aa + bs2*ai[i];
374e2b4712SSatish Balay     vi  = aj + ai[i];
384e2b4712SSatish Balay     nz  = a->diag[i] - ai[i];
394e2b4712SSatish Balay     sum = tmp + bs*i;
404e2b4712SSatish Balay     PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));
414e2b4712SSatish Balay     while (nz--) {
424e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++));
434e2b4712SSatish Balay       v += bs2;
444e2b4712SSatish Balay     }
454e2b4712SSatish Balay   }
464e2b4712SSatish Balay   /* backward solve the upper triangular */
474e2b4712SSatish Balay   lsum = a->solve_work + a->n;
484e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
494e2b4712SSatish Balay     v   = aa + bs2*(a->diag[i] + 1);
504e2b4712SSatish Balay     vi  = aj + a->diag[i] + 1;
514e2b4712SSatish Balay     nz  = ai[i+1] - a->diag[i] - 1;
524e2b4712SSatish Balay     PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));
534e2b4712SSatish Balay     while (nz--) {
544e2b4712SSatish Balay       Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++));
554e2b4712SSatish Balay       v += bs2;
564e2b4712SSatish Balay     }
574e2b4712SSatish Balay     Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs);
584e2b4712SSatish Balay     PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));
594e2b4712SSatish Balay   }
604e2b4712SSatish Balay 
614e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
624e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
63e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
64e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
6542015d63SBarry Smith   PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n);
664e2b4712SSatish Balay   PetscFunctionReturn(0);
674e2b4712SSatish Balay }
684e2b4712SSatish Balay 
694e2b4712SSatish Balay #undef __FUNC__
704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7"
714e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
724e2b4712SSatish Balay {
734e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
744e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
754e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
764e2b4712SSatish Balay   int             *diag = a->diag;
773f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
783f1db9ecSBarry Smith   Scalar          sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
793f1db9ecSBarry Smith   Scalar          *x,*b,*tmp;
804e2b4712SSatish Balay 
814e2b4712SSatish Balay   PetscFunctionBegin;
82e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
83e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
844e2b4712SSatish Balay   tmp  = a->solve_work;
854e2b4712SSatish Balay 
864e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
874e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
884e2b4712SSatish Balay 
894e2b4712SSatish Balay   /* forward solve the lower triangular */
904e2b4712SSatish Balay   idx    = 7*(*r++);
914e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
924e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
934e2b4712SSatish Balay   tmp[5] = b[5+idx]; tmp[6] = b[6+idx];
944e2b4712SSatish Balay 
954e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
964e2b4712SSatish Balay     v     = aa + 49*ai[i];
974e2b4712SSatish Balay     vi    = aj + ai[i];
984e2b4712SSatish Balay     nz    = diag[i] - ai[i];
994e2b4712SSatish Balay     idx   = 7*(*r++);
1004e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
1014e2b4712SSatish Balay     sum5  = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx];
1024e2b4712SSatish Balay     while (nz--) {
1034e2b4712SSatish Balay       idx   = 7*(*vi++);
1044e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
1054e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
1064e2b4712SSatish Balay       x6    = tmp[5+idx];x7 = tmp[6+idx];
1074e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1084e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1094e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1104e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1114e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1124e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1134e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1144e2b4712SSatish Balay       v += 49;
1154e2b4712SSatish Balay     }
1164e2b4712SSatish Balay     idx = 7*i;
1174e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
1184e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
1194e2b4712SSatish Balay     tmp[5+idx] = sum6;tmp[6+idx] = sum7;
1204e2b4712SSatish Balay   }
1214e2b4712SSatish Balay   /* backward solve the upper triangular */
1224e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
1234e2b4712SSatish Balay     v    = aa + 49*diag[i] + 49;
1244e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
1254e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
1264e2b4712SSatish Balay     idt  = 7*i;
1274e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
1284e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
1294e2b4712SSatish Balay     sum6 = tmp[5+idt];sum7 = tmp[6+idt];
1304e2b4712SSatish Balay     while (nz--) {
1314e2b4712SSatish Balay       idx   = 7*(*vi++);
1324e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
1334e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
1344e2b4712SSatish Balay       x6    = tmp[5+idx]; x7 = tmp[6+idx];
1354e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1364e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1374e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1384e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1394e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1404e2b4712SSatish Balay       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1414e2b4712SSatish Balay       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1424e2b4712SSatish Balay       v += 49;
1434e2b4712SSatish Balay     }
1444e2b4712SSatish Balay     idc = 7*(*c--);
1454e2b4712SSatish Balay     v   = aa + 49*diag[i];
1464e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[7]*sum2+v[14]*sum3+
1474e2b4712SSatish Balay                                  v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7;
1484e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+
1494e2b4712SSatish Balay                                  v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7;
1504e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+
1514e2b4712SSatish Balay                                  v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7;
1524e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+
1534e2b4712SSatish Balay                                  v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7;
1544e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+
1554e2b4712SSatish Balay                                  v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7;
1564e2b4712SSatish Balay     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+
1574e2b4712SSatish Balay                                  v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7;
1584e2b4712SSatish Balay     x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+
1594e2b4712SSatish Balay                                  v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7;
1604e2b4712SSatish Balay   }
1614e2b4712SSatish Balay 
1624e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
1634e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
164e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
165e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
16642015d63SBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
1674e2b4712SSatish Balay   PetscFunctionReturn(0);
1684e2b4712SSatish Balay }
1694e2b4712SSatish Balay 
1704e2b4712SSatish Balay #undef __FUNC__
171*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering"
172*15091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
173*15091d37SBarry Smith {
174*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
175*15091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
176*15091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
177*15091d37SBarry Smith   MatScalar       *aa=a->a,*v;
178*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
179*15091d37SBarry Smith 
180*15091d37SBarry Smith   PetscFunctionBegin;
181*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
182*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
183*15091d37SBarry Smith   /* forward solve the lower triangular */
184*15091d37SBarry Smith   idx    = 0;
185*15091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
186*15091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
187*15091d37SBarry Smith   x[6] = b[6+idx];
188*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
189*15091d37SBarry Smith     v     =  aa + 49*ai[i];
190*15091d37SBarry Smith     vi    =  aj + ai[i];
191*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
192*15091d37SBarry Smith     idx   =  7*i;
193*15091d37SBarry Smith     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
194*15091d37SBarry Smith     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
195*15091d37SBarry Smith     sum7  =  b[6+idx];
196*15091d37SBarry Smith     while (nz--) {
197*15091d37SBarry Smith       jdx   = 7*(*vi++);
198*15091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
199*15091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
200*15091d37SBarry Smith       x7    = x[6+jdx];
201*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
202*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
203*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
204*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
205*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
206*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
207*15091d37SBarry Smith       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
208*15091d37SBarry Smith       v += 49;
209*15091d37SBarry Smith      }
210*15091d37SBarry Smith     x[idx]   = sum1;
211*15091d37SBarry Smith     x[1+idx] = sum2;
212*15091d37SBarry Smith     x[2+idx] = sum3;
213*15091d37SBarry Smith     x[3+idx] = sum4;
214*15091d37SBarry Smith     x[4+idx] = sum5;
215*15091d37SBarry Smith     x[5+idx] = sum6;
216*15091d37SBarry Smith     x[6+idx] = sum7;
217*15091d37SBarry Smith   }
218*15091d37SBarry Smith   /* backward solve the upper triangular */
219*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
220*15091d37SBarry Smith     v    = aa + 49*diag[i] + 49;
221*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
222*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
223*15091d37SBarry Smith     idt  = 7*i;
224*15091d37SBarry Smith     sum1 = x[idt];   sum2 = x[1+idt];
225*15091d37SBarry Smith     sum3 = x[2+idt]; sum4 = x[3+idt];
226*15091d37SBarry Smith     sum5 = x[4+idt]; sum6 = x[5+idt];
227*15091d37SBarry Smith     sum7 = x[6+idt];
228*15091d37SBarry Smith     while (nz--) {
229*15091d37SBarry Smith       idx   = 7*(*vi++);
230*15091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
231*15091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
232*15091d37SBarry Smith       x7    = x[6+idx];
233*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
234*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
235*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
236*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
237*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
238*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
239*15091d37SBarry Smith       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
240*15091d37SBarry Smith       v += 49;
241*15091d37SBarry Smith     }
242*15091d37SBarry Smith     v        = aa + 49*diag[i];
243*15091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[7]*sum2  + v[14]*sum3 + v[21]*sum4
244*15091d37SBarry Smith                          + v[28]*sum5 + v[35]*sum6 + v[42]*sum7;
245*15091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[8]*sum2  + v[15]*sum3 + v[22]*sum4
246*15091d37SBarry Smith                          + v[29]*sum5 + v[36]*sum6 + v[43]*sum7;
247*15091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[9]*sum2  + v[16]*sum3 + v[23]*sum4
248*15091d37SBarry Smith                          + v[30]*sum5 + v[37]*sum6 + v[44]*sum7;
249*15091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[10]*sum2  + v[17]*sum3 + v[24]*sum4
250*15091d37SBarry Smith                          + v[31]*sum5 + v[38]*sum6 + v[45]*sum7;
251*15091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[11]*sum2  + v[18]*sum3 + v[25]*sum4
252*15091d37SBarry Smith                          + v[32]*sum5 + v[39]*sum6 + v[46]*sum7;
253*15091d37SBarry Smith     x[5+idt] = v[5]*sum1 + v[12]*sum2  + v[19]*sum3 + v[26]*sum4
254*15091d37SBarry Smith                          + v[33]*sum5 + v[40]*sum6 + v[47]*sum7;
255*15091d37SBarry Smith     x[6+idt] = v[6]*sum1 + v[13]*sum2  + v[20]*sum3 + v[27]*sum4
256*15091d37SBarry Smith                          + v[34]*sum5 + v[41]*sum6 + v[48]*sum7;
257*15091d37SBarry Smith   }
258*15091d37SBarry Smith 
259*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
260*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
261*15091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
262*15091d37SBarry Smith   PetscFunctionReturn(0);
263*15091d37SBarry Smith }
264*15091d37SBarry Smith 
265*15091d37SBarry Smith #undef __FUNC__
266*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6"
267*15091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
268*15091d37SBarry Smith {
269*15091d37SBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
270*15091d37SBarry Smith   IS              iscol=a->col,isrow=a->row;
271*15091d37SBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
272*15091d37SBarry Smith   int             *diag = a->diag;
273*15091d37SBarry Smith   MatScalar       *aa=a->a,*v;
274*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp;
275*15091d37SBarry Smith 
276*15091d37SBarry Smith   PetscFunctionBegin;
277*15091d37SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
278*15091d37SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
279*15091d37SBarry Smith   tmp  = a->solve_work;
280*15091d37SBarry Smith 
281*15091d37SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
282*15091d37SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
283*15091d37SBarry Smith 
284*15091d37SBarry Smith   /* forward solve the lower triangular */
285*15091d37SBarry Smith   idx    = 6*(*r++);
286*15091d37SBarry Smith   tmp[0] = b[idx];   tmp[1] = b[1+idx];
287*15091d37SBarry Smith   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
288*15091d37SBarry Smith   tmp[4] = b[4+idx]; tmp[5] = b[5+idx];
289*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
290*15091d37SBarry Smith     v     = aa + 36*ai[i];
291*15091d37SBarry Smith     vi    = aj + ai[i];
292*15091d37SBarry Smith     nz    = diag[i] - ai[i];
293*15091d37SBarry Smith     idx   = 6*(*r++);
294*15091d37SBarry Smith     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
295*15091d37SBarry Smith     sum5  = b[4+idx]; sum6 = b[5+idx];
296*15091d37SBarry Smith     while (nz--) {
297*15091d37SBarry Smith       idx   = 6*(*vi++);
298*15091d37SBarry Smith       x1    = tmp[idx];   x2 = tmp[1+idx]; x3 = tmp[2+idx];
299*15091d37SBarry Smith       x4    = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx];
300*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
301*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
302*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
303*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
304*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
305*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
306*15091d37SBarry Smith       v += 36;
307*15091d37SBarry Smith     }
308*15091d37SBarry Smith     idx = 6*i;
309*15091d37SBarry Smith     tmp[idx]   = sum1;tmp[1+idx] = sum2;
310*15091d37SBarry Smith     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
311*15091d37SBarry Smith     tmp[4+idx] = sum5;tmp[5+idx] = sum6;
312*15091d37SBarry Smith   }
313*15091d37SBarry Smith   /* backward solve the upper triangular */
314*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
315*15091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
316*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
317*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
318*15091d37SBarry Smith     idt  = 6*i;
319*15091d37SBarry Smith     sum1 = tmp[idt];  sum2 = tmp[1+idt];
320*15091d37SBarry Smith     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
321*15091d37SBarry Smith     sum5 = tmp[4+idt];sum6 = tmp[5+idt];
322*15091d37SBarry Smith     while (nz--) {
323*15091d37SBarry Smith       idx   = 6*(*vi++);
324*15091d37SBarry Smith       x1    = tmp[idx];   x2 = tmp[1+idx];
325*15091d37SBarry Smith       x3    = tmp[2+idx]; x4 = tmp[3+idx];
326*15091d37SBarry Smith       x5    = tmp[4+idx]; x6 = tmp[5+idx];
327*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
328*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
329*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
330*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
331*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
332*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
333*15091d37SBarry Smith       v += 36;
334*15091d37SBarry Smith     }
335*15091d37SBarry Smith     idc = 6*(*c--);
336*15091d37SBarry Smith     v   = aa + 36*diag[i];
337*15091d37SBarry Smith     x[idc]   = tmp[idt]   = v[0]*sum1+v[6]*sum2+v[12]*sum3+
338*15091d37SBarry Smith                                  v[18]*sum4+v[24]*sum5+v[30]*sum6;
339*15091d37SBarry Smith     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+
340*15091d37SBarry Smith                                  v[19]*sum4+v[25]*sum5+v[31]*sum6;
341*15091d37SBarry Smith     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+
342*15091d37SBarry Smith                                  v[20]*sum4+v[26]*sum5+v[32]*sum6;
343*15091d37SBarry Smith     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+
344*15091d37SBarry Smith                                  v[21]*sum4+v[27]*sum5+v[33]*sum6;
345*15091d37SBarry Smith     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+
346*15091d37SBarry Smith                                  v[22]*sum4+v[28]*sum5+v[34]*sum6;
347*15091d37SBarry Smith     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+
348*15091d37SBarry Smith                                  v[23]*sum4+v[29]*sum5+v[35]*sum6;
349*15091d37SBarry Smith   }
350*15091d37SBarry Smith 
351*15091d37SBarry Smith   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
352*15091d37SBarry Smith   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
353*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
354*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
355*15091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
356*15091d37SBarry Smith   PetscFunctionReturn(0);
357*15091d37SBarry Smith }
358*15091d37SBarry Smith 
359*15091d37SBarry Smith #undef __FUNC__
360*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering"
361*15091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
362*15091d37SBarry Smith {
363*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
364*15091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
365*15091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
366*15091d37SBarry Smith   MatScalar       *aa=a->a,*v;
367*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6;
368*15091d37SBarry Smith 
369*15091d37SBarry Smith   PetscFunctionBegin;
370*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
371*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
372*15091d37SBarry Smith   /* forward solve the lower triangular */
373*15091d37SBarry Smith   idx    = 0;
374*15091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
375*15091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
376*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
377*15091d37SBarry Smith     v     =  aa + 36*ai[i];
378*15091d37SBarry Smith     vi    =  aj + ai[i];
379*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
380*15091d37SBarry Smith     idx   =  6*i;
381*15091d37SBarry Smith     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
382*15091d37SBarry Smith     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
383*15091d37SBarry Smith     while (nz--) {
384*15091d37SBarry Smith       jdx   = 6*(*vi++);
385*15091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
386*15091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
387*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
388*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
389*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
390*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
391*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
392*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
393*15091d37SBarry Smith       v += 36;
394*15091d37SBarry Smith      }
395*15091d37SBarry Smith     x[idx]   = sum1;
396*15091d37SBarry Smith     x[1+idx] = sum2;
397*15091d37SBarry Smith     x[2+idx] = sum3;
398*15091d37SBarry Smith     x[3+idx] = sum4;
399*15091d37SBarry Smith     x[4+idx] = sum5;
400*15091d37SBarry Smith     x[5+idx] = sum6;
401*15091d37SBarry Smith   }
402*15091d37SBarry Smith   /* backward solve the upper triangular */
403*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
404*15091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
405*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
406*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
407*15091d37SBarry Smith     idt  = 6*i;
408*15091d37SBarry Smith     sum1 = x[idt];   sum2 = x[1+idt];
409*15091d37SBarry Smith     sum3 = x[2+idt]; sum4 = x[3+idt];
410*15091d37SBarry Smith     sum5 = x[4+idt]; sum6 = x[5+idt];
411*15091d37SBarry Smith     while (nz--) {
412*15091d37SBarry Smith       idx   = 6*(*vi++);
413*15091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
414*15091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
415*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
416*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
417*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
418*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
419*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
420*15091d37SBarry Smith       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
421*15091d37SBarry Smith       v += 36;
422*15091d37SBarry Smith     }
423*15091d37SBarry Smith     v        = aa + 36*diag[i];
424*15091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[6]*sum2  + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6;
425*15091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[7]*sum2  + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6;
426*15091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[8]*sum2  + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6;
427*15091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[9]*sum2  + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6;
428*15091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6;
429*15091d37SBarry Smith     x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6;
430*15091d37SBarry Smith   }
431*15091d37SBarry Smith 
432*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
433*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
434*15091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
435*15091d37SBarry Smith   PetscFunctionReturn(0);
436*15091d37SBarry Smith }
437*15091d37SBarry Smith 
438*15091d37SBarry Smith #undef __FUNC__
4394e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5"
4404e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
4414e2b4712SSatish Balay {
4424e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
4434e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
4444e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
4454e2b4712SSatish Balay   int             *diag = a->diag;
4463f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
4473f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp;
4484e2b4712SSatish Balay 
4494e2b4712SSatish Balay   PetscFunctionBegin;
450e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
451e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
4524e2b4712SSatish Balay   tmp  = a->solve_work;
4534e2b4712SSatish Balay 
4544e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
4554e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
4564e2b4712SSatish Balay 
4574e2b4712SSatish Balay   /* forward solve the lower triangular */
4584e2b4712SSatish Balay   idx    = 5*(*r++);
4594e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
4604e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
4614e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
4624e2b4712SSatish Balay     v     = aa + 25*ai[i];
4634e2b4712SSatish Balay     vi    = aj + ai[i];
4644e2b4712SSatish Balay     nz    = diag[i] - ai[i];
4654e2b4712SSatish Balay     idx   = 5*(*r++);
4664e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
4674e2b4712SSatish Balay     sum5  = b[4+idx];
4684e2b4712SSatish Balay     while (nz--) {
4694e2b4712SSatish Balay       idx   = 5*(*vi++);
4704e2b4712SSatish Balay       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
4714e2b4712SSatish Balay       x4    = tmp[3+idx];x5 = tmp[4+idx];
4724e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
4734e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
4744e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
4754e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
4764e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
4774e2b4712SSatish Balay       v += 25;
4784e2b4712SSatish Balay     }
4794e2b4712SSatish Balay     idx = 5*i;
4804e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
4814e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
4824e2b4712SSatish Balay   }
4834e2b4712SSatish Balay   /* backward solve the upper triangular */
4844e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
4854e2b4712SSatish Balay     v    = aa + 25*diag[i] + 25;
4864e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
4874e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
4884e2b4712SSatish Balay     idt  = 5*i;
4894e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
4904e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
4914e2b4712SSatish Balay     while (nz--) {
4924e2b4712SSatish Balay       idx   = 5*(*vi++);
4934e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
4944e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
4954e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
4964e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
4974e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
4984e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
4994e2b4712SSatish Balay       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
5004e2b4712SSatish Balay       v += 25;
5014e2b4712SSatish Balay     }
5024e2b4712SSatish Balay     idc = 5*(*c--);
5034e2b4712SSatish Balay     v   = aa + 25*diag[i];
5044e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[5]*sum2+v[10]*sum3+
5054e2b4712SSatish Balay                                  v[15]*sum4+v[20]*sum5;
5064e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+
5074e2b4712SSatish Balay                                  v[16]*sum4+v[21]*sum5;
5084e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+
5094e2b4712SSatish Balay                                  v[17]*sum4+v[22]*sum5;
5104e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+
5114e2b4712SSatish Balay                                  v[18]*sum4+v[23]*sum5;
5124e2b4712SSatish Balay     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+
5134e2b4712SSatish Balay                                  v[19]*sum4+v[24]*sum5;
5144e2b4712SSatish Balay   }
5154e2b4712SSatish Balay 
5164e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
5174e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
518e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
519e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
52042015d63SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
5214e2b4712SSatish Balay   PetscFunctionReturn(0);
5224e2b4712SSatish Balay }
5234e2b4712SSatish Balay 
5244e2b4712SSatish Balay #undef __FUNC__
525*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
526*15091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
527*15091d37SBarry Smith {
528*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
529*15091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
530*15091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
531*15091d37SBarry Smith   MatScalar       *aa=a->a,*v;
532*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;;
533*15091d37SBarry Smith 
534*15091d37SBarry Smith   PetscFunctionBegin;
535*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
536*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
537*15091d37SBarry Smith   /* forward solve the lower triangular */
538*15091d37SBarry Smith   idx    = 0;
539*15091d37SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
540*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
541*15091d37SBarry Smith     v     =  aa + 25*ai[i];
542*15091d37SBarry Smith     vi    =  aj + ai[i];
543*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
544*15091d37SBarry Smith     idx   =  5*i;
545*15091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx];
546*15091d37SBarry Smith     while (nz--) {
547*15091d37SBarry Smith       jdx   = 5*(*vi++);
548*15091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
549*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
550*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
551*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
552*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
553*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
554*15091d37SBarry Smith       v    += 25;
555*15091d37SBarry Smith     }
556*15091d37SBarry Smith     x[idx]   = sum1;
557*15091d37SBarry Smith     x[1+idx] = sum2;
558*15091d37SBarry Smith     x[2+idx] = sum3;
559*15091d37SBarry Smith     x[3+idx] = sum4;
560*15091d37SBarry Smith     x[4+idx] = sum5;
561*15091d37SBarry Smith   }
562*15091d37SBarry Smith   /* backward solve the upper triangular */
563*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
564*15091d37SBarry Smith     v    = aa + 25*diag[i] + 25;
565*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
566*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
567*15091d37SBarry Smith     idt  = 5*i;
568*15091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
569*15091d37SBarry Smith     sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt];
570*15091d37SBarry Smith     while (nz--) {
571*15091d37SBarry Smith       idx   = 5*(*vi++);
572*15091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
573*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
574*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
575*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
576*15091d37SBarry Smith       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
577*15091d37SBarry Smith       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
578*15091d37SBarry Smith       v    += 25;
579*15091d37SBarry Smith     }
580*15091d37SBarry Smith     v        = aa + 25*diag[i];
581*15091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[5]*sum2 + v[10]*sum3  + v[15]*sum4 + v[20]*sum5;
582*15091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3  + v[16]*sum4 + v[21]*sum5;
583*15091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3  + v[17]*sum4 + v[22]*sum5;
584*15091d37SBarry Smith     x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3  + v[18]*sum4 + v[23]*sum5;
585*15091d37SBarry Smith     x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3  + v[19]*sum4 + v[24]*sum5;
586*15091d37SBarry Smith   }
587*15091d37SBarry Smith 
588*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
589*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
590*15091d37SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
591*15091d37SBarry Smith   PetscFunctionReturn(0);
592*15091d37SBarry Smith }
593*15091d37SBarry Smith 
594*15091d37SBarry Smith #undef __FUNC__
5954e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4"
5964e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
5974e2b4712SSatish Balay {
5984e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
5994e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
6004e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
6014e2b4712SSatish Balay   int             *diag = a->diag;
6023f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
6033f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp;
6044e2b4712SSatish Balay 
6054e2b4712SSatish Balay   PetscFunctionBegin;
606e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
607e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
6084e2b4712SSatish Balay   tmp  = a->solve_work;
6094e2b4712SSatish Balay 
6104e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
6114e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
6124e2b4712SSatish Balay 
6134e2b4712SSatish Balay   /* forward solve the lower triangular */
6144e2b4712SSatish Balay   idx    = 4*(*r++);
6154e2b4712SSatish Balay   tmp[0] = b[idx];   tmp[1] = b[1+idx];
6164e2b4712SSatish Balay   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
6174e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
6184e2b4712SSatish Balay     v     = aa + 16*ai[i];
6194e2b4712SSatish Balay     vi    = aj + ai[i];
6204e2b4712SSatish Balay     nz    = diag[i] - ai[i];
6214e2b4712SSatish Balay     idx   = 4*(*r++);
6224e2b4712SSatish Balay     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
6234e2b4712SSatish Balay     while (nz--) {
6244e2b4712SSatish Balay       idx   = 4*(*vi++);
6254e2b4712SSatish Balay       x1    = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx];
6264e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
6274e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
6284e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
6294e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
6304e2b4712SSatish Balay       v    += 16;
6314e2b4712SSatish Balay     }
6324e2b4712SSatish Balay     idx        = 4*i;
6334e2b4712SSatish Balay     tmp[idx]   = sum1;tmp[1+idx] = sum2;
6344e2b4712SSatish Balay     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
6354e2b4712SSatish Balay   }
6364e2b4712SSatish Balay   /* backward solve the upper triangular */
6374e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
6384e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
6394e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
6404e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
6414e2b4712SSatish Balay     idt  = 4*i;
6424e2b4712SSatish Balay     sum1 = tmp[idt];  sum2 = tmp[1+idt];
6434e2b4712SSatish Balay     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
6444e2b4712SSatish Balay     while (nz--) {
6454e2b4712SSatish Balay       idx   = 4*(*vi++);
6464e2b4712SSatish Balay       x1    = tmp[idx];   x2 = tmp[1+idx];
6474e2b4712SSatish Balay       x3    = tmp[2+idx]; x4 = tmp[3+idx];
6484e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
6494e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
6504e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
6514e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
6524e2b4712SSatish Balay       v += 16;
6534e2b4712SSatish Balay     }
6544e2b4712SSatish Balay     idc      = 4*(*c--);
6554e2b4712SSatish Balay     v        = aa + 16*diag[i];
6564e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4;
6574e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4;
6584e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4;
6594e2b4712SSatish Balay     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4;
6604e2b4712SSatish Balay   }
6614e2b4712SSatish Balay 
6624e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
6634e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
664e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
665e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
66642015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
6674e2b4712SSatish Balay   PetscFunctionReturn(0);
6684e2b4712SSatish Balay }
6690ef38995SBarry Smith 
6700ef38995SBarry Smith 
6714e2b4712SSatish Balay /*
6724e2b4712SSatish Balay       Special case where the matrix was ILU(0) factored in the natural
6734e2b4712SSatish Balay    ordering. This eliminates the need for the column and row permutation.
6744e2b4712SSatish Balay */
6754e2b4712SSatish Balay #undef __FUNC__
6764e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
6774e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
6784e2b4712SSatish Balay {
6794e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
68030d4dcafSBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
68130d4dcafSBarry Smith   int             ierr,*diag = a->diag;
6823f1db9ecSBarry Smith   MatScalar       *aa=a->a;
68330d4dcafSBarry Smith   Scalar          *x,*b;
6844e2b4712SSatish Balay 
6854e2b4712SSatish Balay   PetscFunctionBegin;
686e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
687e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6884e2b4712SSatish Balay 
6892853dc0eSBarry Smith #if defined(USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
6902853dc0eSBarry Smith   {
6912853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
6922853dc0eSBarry Smith     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
6932853dc0eSBarry Smith   }
6942853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJ)
6952853dc0eSBarry Smith   {
6962853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
6972853dc0eSBarry Smith     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
6982853dc0eSBarry Smith   }
6992853dc0eSBarry Smith #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
7002853dc0eSBarry Smith   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
701e1293385SBarry Smith #else
70230d4dcafSBarry Smith   {
7033f1db9ecSBarry Smith     Scalar    sum1,sum2,sum3,sum4,x1,x2,x3,x4;
7043f1db9ecSBarry Smith     MatScalar *v;
70530d4dcafSBarry Smith     int       jdx,idt,idx,nz,*vi,i;
706e1293385SBarry Smith 
7074e2b4712SSatish Balay   /* forward solve the lower triangular */
7084e2b4712SSatish Balay   idx    = 0;
709e1293385SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
7104e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
7114e2b4712SSatish Balay     v     =  aa      + 16*ai[i];
7124e2b4712SSatish Balay     vi    =  aj      + ai[i];
7134e2b4712SSatish Balay     nz    =  diag[i] - ai[i];
714e1293385SBarry Smith     idx   +=  4;
7154e2b4712SSatish Balay     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
7164e2b4712SSatish Balay     while (nz--) {
7174e2b4712SSatish Balay       jdx   = 4*(*vi++);
7184e2b4712SSatish Balay       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
7194e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
7204e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
7214e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
7224e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
7234e2b4712SSatish Balay       v    += 16;
7244e2b4712SSatish Balay     }
7254e2b4712SSatish Balay     x[idx]   = sum1;
7264e2b4712SSatish Balay     x[1+idx] = sum2;
7274e2b4712SSatish Balay     x[2+idx] = sum3;
7284e2b4712SSatish Balay     x[3+idx] = sum4;
7294e2b4712SSatish Balay   }
7304e2b4712SSatish Balay   /* backward solve the upper triangular */
7314e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
7324e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
7334e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
7344e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
7354e2b4712SSatish Balay     idt  = 4*i;
7364e2b4712SSatish Balay     sum1 = x[idt];  sum2 = x[1+idt];
7374e2b4712SSatish Balay     sum3 = x[2+idt];sum4 = x[3+idt];
7384e2b4712SSatish Balay     while (nz--) {
7394e2b4712SSatish Balay       idx   = 4*(*vi++);
7404e2b4712SSatish Balay       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
7414e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
7424e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
7434e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
7444e2b4712SSatish Balay       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
7454e2b4712SSatish Balay       v    += 16;
7464e2b4712SSatish Balay     }
7474e2b4712SSatish Balay     v        = aa + 16*diag[i];
7484e2b4712SSatish Balay     x[idt]   = v[0]*sum1 + v[4]*sum2 + v[8]*sum3  + v[12]*sum4;
7494e2b4712SSatish Balay     x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3  + v[13]*sum4;
7504e2b4712SSatish Balay     x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4;
7514e2b4712SSatish Balay     x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4;
7524e2b4712SSatish Balay   }
75330d4dcafSBarry Smith   }
754e1293385SBarry Smith #endif
7554e2b4712SSatish Balay 
756e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
757e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
75842015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
7594e2b4712SSatish Balay   PetscFunctionReturn(0);
7604e2b4712SSatish Balay }
7614e2b4712SSatish Balay 
762667159a5SBarry Smith #undef __FUNC__
7634e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3"
7644e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
7654e2b4712SSatish Balay {
7664e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
7674e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
7684e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
7694e2b4712SSatish Balay   int             *diag = a->diag;
7703f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
7713f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp;
7724e2b4712SSatish Balay 
7734e2b4712SSatish Balay   PetscFunctionBegin;
774e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
775e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
7764e2b4712SSatish Balay   tmp  = a->solve_work;
7774e2b4712SSatish Balay 
7784e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
7794e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
7804e2b4712SSatish Balay 
7814e2b4712SSatish Balay   /* forward solve the lower triangular */
7824e2b4712SSatish Balay   idx    = 3*(*r++);
7834e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx];
7844e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
7854e2b4712SSatish Balay     v     = aa + 9*ai[i];
7864e2b4712SSatish Balay     vi    = aj + ai[i];
7874e2b4712SSatish Balay     nz    = diag[i] - ai[i];
7884e2b4712SSatish Balay     idx   = 3*(*r++);
7894e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx];
7904e2b4712SSatish Balay     while (nz--) {
7914e2b4712SSatish Balay       idx   = 3*(*vi++);
7924e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
7934e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
7944e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
7954e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
7964e2b4712SSatish Balay       v += 9;
7974e2b4712SSatish Balay     }
7984e2b4712SSatish Balay     idx = 3*i;
7994e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3;
8004e2b4712SSatish Balay   }
8014e2b4712SSatish Balay   /* backward solve the upper triangular */
8024e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
8034e2b4712SSatish Balay     v    = aa + 9*diag[i] + 9;
8044e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
8054e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
8064e2b4712SSatish Balay     idt  = 3*i;
8074e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt];
8084e2b4712SSatish Balay     while (nz--) {
8094e2b4712SSatish Balay       idx   = 3*(*vi++);
8104e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
8114e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
8124e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
8134e2b4712SSatish Balay       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
8144e2b4712SSatish Balay       v += 9;
8154e2b4712SSatish Balay     }
8164e2b4712SSatish Balay     idc = 3*(*c--);
8174e2b4712SSatish Balay     v   = aa + 9*diag[i];
8184e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
8194e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
8204e2b4712SSatish Balay     x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
8214e2b4712SSatish Balay   }
8224e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
8234e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
824e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
825e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
82642015d63SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
8274e2b4712SSatish Balay   PetscFunctionReturn(0);
8284e2b4712SSatish Balay }
8294e2b4712SSatish Balay 
830*15091d37SBarry Smith /*
831*15091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
832*15091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
833*15091d37SBarry Smith */
834*15091d37SBarry Smith #undef __FUNC__
835*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering"
836*15091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
837*15091d37SBarry Smith {
838*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
839*15091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
840*15091d37SBarry Smith   int             ierr,*diag = a->diag;
841*15091d37SBarry Smith   MatScalar       *aa=a->a, *v;
842*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3;
843*15091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
844*15091d37SBarry Smith 
845*15091d37SBarry Smith   PetscFunctionBegin;
846*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
847*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
848*15091d37SBarry Smith 
849*15091d37SBarry Smith 
850*15091d37SBarry Smith   /* forward solve the lower triangular */
851*15091d37SBarry Smith   idx    = 0;
852*15091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2];
853*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
854*15091d37SBarry Smith     v     =  aa      + 9*ai[i];
855*15091d37SBarry Smith     vi    =  aj      + ai[i];
856*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
857*15091d37SBarry Smith     idx   +=  3;
858*15091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];
859*15091d37SBarry Smith     while (nz--) {
860*15091d37SBarry Smith       jdx   = 3*(*vi++);
861*15091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
862*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
863*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
864*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
865*15091d37SBarry Smith       v    += 9;
866*15091d37SBarry Smith     }
867*15091d37SBarry Smith     x[idx]   = sum1;
868*15091d37SBarry Smith     x[1+idx] = sum2;
869*15091d37SBarry Smith     x[2+idx] = sum3;
870*15091d37SBarry Smith   }
871*15091d37SBarry Smith   /* backward solve the upper triangular */
872*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
873*15091d37SBarry Smith     v    = aa + 9*diag[i] + 9;
874*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
875*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
876*15091d37SBarry Smith     idt  = 3*i;
877*15091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
878*15091d37SBarry Smith     sum3 = x[2+idt];
879*15091d37SBarry Smith     while (nz--) {
880*15091d37SBarry Smith       idx   = 3*(*vi++);
881*15091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
882*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
883*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
884*15091d37SBarry Smith       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
885*15091d37SBarry Smith       v    += 9;
886*15091d37SBarry Smith     }
887*15091d37SBarry Smith     v        = aa +  9*diag[i];
888*15091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
889*15091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
890*15091d37SBarry Smith     x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
891*15091d37SBarry Smith   }
892*15091d37SBarry Smith 
893*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
894*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
895*15091d37SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
896*15091d37SBarry Smith   PetscFunctionReturn(0);
897*15091d37SBarry Smith }
898*15091d37SBarry Smith 
8994e2b4712SSatish Balay #undef __FUNC__
9004e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2"
9014e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
9024e2b4712SSatish Balay {
9034e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
9044e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
9054e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
9064e2b4712SSatish Balay   int             *diag = a->diag;
9073f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
9083f1db9ecSBarry Smith   Scalar          *x,*b,sum1,sum2,x1,x2,*tmp;
9094e2b4712SSatish Balay 
9104e2b4712SSatish Balay   PetscFunctionBegin;
911e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
912e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
9134e2b4712SSatish Balay   tmp  = a->solve_work;
9144e2b4712SSatish Balay 
9154e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
9164e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
9174e2b4712SSatish Balay 
9184e2b4712SSatish Balay   /* forward solve the lower triangular */
9194e2b4712SSatish Balay   idx    = 2*(*r++);
9204e2b4712SSatish Balay   tmp[0] = b[idx]; tmp[1] = b[1+idx];
9214e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
9224e2b4712SSatish Balay     v     = aa + 4*ai[i];
9234e2b4712SSatish Balay     vi    = aj + ai[i];
9244e2b4712SSatish Balay     nz    = diag[i] - ai[i];
9254e2b4712SSatish Balay     idx   = 2*(*r++);
9264e2b4712SSatish Balay     sum1  = b[idx]; sum2 = b[1+idx];
9274e2b4712SSatish Balay     while (nz--) {
9284e2b4712SSatish Balay       idx   = 2*(*vi++);
9294e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
9304e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
9314e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
9324e2b4712SSatish Balay       v += 4;
9334e2b4712SSatish Balay     }
9344e2b4712SSatish Balay     idx = 2*i;
9354e2b4712SSatish Balay     tmp[idx] = sum1; tmp[1+idx] = sum2;
9364e2b4712SSatish Balay   }
9374e2b4712SSatish Balay   /* backward solve the upper triangular */
9384e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
9394e2b4712SSatish Balay     v    = aa + 4*diag[i] + 4;
9404e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
9414e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
9424e2b4712SSatish Balay     idt  = 2*i;
9434e2b4712SSatish Balay     sum1 = tmp[idt]; sum2 = tmp[1+idt];
9444e2b4712SSatish Balay     while (nz--) {
9454e2b4712SSatish Balay       idx   = 2*(*vi++);
9464e2b4712SSatish Balay       x1    = tmp[idx]; x2 = tmp[1+idx];
9474e2b4712SSatish Balay       sum1 -= v[0]*x1 + v[2]*x2;
9484e2b4712SSatish Balay       sum2 -= v[1]*x1 + v[3]*x2;
9494e2b4712SSatish Balay       v += 4;
9504e2b4712SSatish Balay     }
9514e2b4712SSatish Balay     idc = 2*(*c--);
9524e2b4712SSatish Balay     v   = aa + 4*diag[i];
9534e2b4712SSatish Balay     x[idc]   = tmp[idt]   = v[0]*sum1 + v[2]*sum2;
9544e2b4712SSatish Balay     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2;
9554e2b4712SSatish Balay   }
9564e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
9574e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
958e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
959e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
96042015d63SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
9614e2b4712SSatish Balay   PetscFunctionReturn(0);
9624e2b4712SSatish Balay }
9634e2b4712SSatish Balay 
964*15091d37SBarry Smith /*
965*15091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
966*15091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
967*15091d37SBarry Smith */
968*15091d37SBarry Smith #undef __FUNC__
969*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering"
970*15091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
971*15091d37SBarry Smith {
972*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
973*15091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
974*15091d37SBarry Smith   int             ierr,*diag = a->diag;
975*15091d37SBarry Smith   MatScalar       *aa=a->a,*v;
976*15091d37SBarry Smith   Scalar          *x,*b,sum1,sum2,x1,x2;
977*15091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
978*15091d37SBarry Smith 
979*15091d37SBarry Smith   PetscFunctionBegin;
980*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
981*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
982*15091d37SBarry Smith 
983*15091d37SBarry Smith   /* forward solve the lower triangular */
984*15091d37SBarry Smith   idx    = 0;
985*15091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1];
986*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
987*15091d37SBarry Smith     v     =  aa      + 4*ai[i];
988*15091d37SBarry Smith     vi    =  aj      + ai[i];
989*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
990*15091d37SBarry Smith     idx   +=  2;
991*15091d37SBarry Smith     sum1  =  b[idx];sum2 = b[1+idx];
992*15091d37SBarry Smith     while (nz--) {
993*15091d37SBarry Smith       jdx   = 2*(*vi++);
994*15091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];
995*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[2]*x2;
996*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[3]*x2;
997*15091d37SBarry Smith       v    += 4;
998*15091d37SBarry Smith     }
999*15091d37SBarry Smith     x[idx]   = sum1;
1000*15091d37SBarry Smith     x[1+idx] = sum2;
1001*15091d37SBarry Smith   }
1002*15091d37SBarry Smith   /* backward solve the upper triangular */
1003*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
1004*15091d37SBarry Smith     v    = aa + 4*diag[i] + 4;
1005*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
1006*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
1007*15091d37SBarry Smith     idt  = 2*i;
1008*15091d37SBarry Smith     sum1 = x[idt];  sum2 = x[1+idt];
1009*15091d37SBarry Smith     while (nz--) {
1010*15091d37SBarry Smith       idx   = 2*(*vi++);
1011*15091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];
1012*15091d37SBarry Smith       sum1 -= v[0]*x1 + v[2]*x2;
1013*15091d37SBarry Smith       sum2 -= v[1]*x1 + v[3]*x2;
1014*15091d37SBarry Smith       v    += 4;
1015*15091d37SBarry Smith     }
1016*15091d37SBarry Smith     v        = aa +  4*diag[i];
1017*15091d37SBarry Smith     x[idt]   = v[0]*sum1 + v[2]*sum2;
1018*15091d37SBarry Smith     x[1+idt] = v[1]*sum1 + v[3]*sum2;
1019*15091d37SBarry Smith   }
1020*15091d37SBarry Smith 
1021*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1022*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023*15091d37SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
1024*15091d37SBarry Smith   PetscFunctionReturn(0);
1025*15091d37SBarry Smith }
1026*15091d37SBarry Smith 
10274e2b4712SSatish Balay #undef __FUNC__
10284e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1"
10294e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
10304e2b4712SSatish Balay {
10314e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
10324e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
10334e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
10344e2b4712SSatish Balay   int             *diag = a->diag;
10353f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
10363f1db9ecSBarry Smith   Scalar          *x,*b,sum1,*tmp;
10374e2b4712SSatish Balay 
10384e2b4712SSatish Balay   PetscFunctionBegin;
10394e2b4712SSatish Balay   if (!n) PetscFunctionReturn(0);
10404e2b4712SSatish Balay 
1041e1311b90SBarry Smith   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
1042e1311b90SBarry Smith   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
10434e2b4712SSatish Balay   tmp  = a->solve_work;
10444e2b4712SSatish Balay 
10454e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
10464e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
10474e2b4712SSatish Balay 
10484e2b4712SSatish Balay   /* forward solve the lower triangular */
10494e2b4712SSatish Balay   tmp[0] = b[*r++];
10504e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
10514e2b4712SSatish Balay     v     = aa + ai[i];
10524e2b4712SSatish Balay     vi    = aj + ai[i];
10534e2b4712SSatish Balay     nz    = diag[i] - ai[i];
10544e2b4712SSatish Balay     sum1  = b[*r++];
10554e2b4712SSatish Balay     while (nz--) {
10564e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
10574e2b4712SSatish Balay     }
10584e2b4712SSatish Balay     tmp[i] = sum1;
10594e2b4712SSatish Balay   }
10604e2b4712SSatish Balay   /* backward solve the upper triangular */
10614e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
10624e2b4712SSatish Balay     v    = aa + diag[i] + 1;
10634e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
10644e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
10654e2b4712SSatish Balay     sum1 = tmp[i];
10664e2b4712SSatish Balay     while (nz--) {
10674e2b4712SSatish Balay       sum1 -= (*v++)*tmp[*vi++];
10684e2b4712SSatish Balay     }
10694e2b4712SSatish Balay     x[*c--] = tmp[i] = aa[diag[i]]*sum1;
10704e2b4712SSatish Balay   }
10714e2b4712SSatish Balay 
10724e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr);
10734e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr);
1074e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
1075e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
10764e2b4712SSatish Balay   PLogFlops(2*1*(a->nz) - a->n);
10774e2b4712SSatish Balay   PetscFunctionReturn(0);
10784e2b4712SSatish Balay }
1079*15091d37SBarry Smith /*
1080*15091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
1081*15091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
1082*15091d37SBarry Smith */
1083*15091d37SBarry Smith #undef __FUNC__
1084*15091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering"
1085*15091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1086*15091d37SBarry Smith {
1087*15091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
1088*15091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
1089*15091d37SBarry Smith   int             ierr,*diag = a->diag;
1090*15091d37SBarry Smith   MatScalar       *aa=a->a;
1091*15091d37SBarry Smith   Scalar          *x,*b;
1092*15091d37SBarry Smith   Scalar          sum1,x1;
1093*15091d37SBarry Smith   MatScalar       *v;
1094*15091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
1095*15091d37SBarry Smith 
1096*15091d37SBarry Smith   PetscFunctionBegin;
1097*15091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1098*15091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1099*15091d37SBarry Smith 
1100*15091d37SBarry Smith   /* forward solve the lower triangular */
1101*15091d37SBarry Smith   idx    = 0;
1102*15091d37SBarry Smith   x[0]   = b[0];
1103*15091d37SBarry Smith   for ( i=1; i<n; i++ ) {
1104*15091d37SBarry Smith     v     =  aa      + ai[i];
1105*15091d37SBarry Smith     vi    =  aj      + ai[i];
1106*15091d37SBarry Smith     nz    =  diag[i] - ai[i];
1107*15091d37SBarry Smith     idx   +=  1;
1108*15091d37SBarry Smith     sum1  =  b[idx];
1109*15091d37SBarry Smith     while (nz--) {
1110*15091d37SBarry Smith       jdx   = *vi++;
1111*15091d37SBarry Smith       x1    = x[jdx];
1112*15091d37SBarry Smith       sum1 -= v[0]*x1;
1113*15091d37SBarry Smith       v    += 1;
1114*15091d37SBarry Smith     }
1115*15091d37SBarry Smith     x[idx]   = sum1;
1116*15091d37SBarry Smith   }
1117*15091d37SBarry Smith   /* backward solve the upper triangular */
1118*15091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
1119*15091d37SBarry Smith     v    = aa + diag[i] + 1;
1120*15091d37SBarry Smith     vi   = aj + diag[i] + 1;
1121*15091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
1122*15091d37SBarry Smith     idt  = i;
1123*15091d37SBarry Smith     sum1 = x[idt];
1124*15091d37SBarry Smith     while (nz--) {
1125*15091d37SBarry Smith       idx   = *vi++;
1126*15091d37SBarry Smith       x1    = x[idx];
1127*15091d37SBarry Smith       sum1 -= v[0]*x1;
1128*15091d37SBarry Smith       v    += 1;
1129*15091d37SBarry Smith     }
1130*15091d37SBarry Smith     v        = aa +  diag[i];
1131*15091d37SBarry Smith     x[idt]   = v[0]*sum1;
1132*15091d37SBarry Smith   }
1133*15091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1134*15091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1135*15091d37SBarry Smith   PLogFlops(2*(a->nz) - a->n);
1136*15091d37SBarry Smith   PetscFunctionReturn(0);
1137*15091d37SBarry Smith }
11384e2b4712SSatish Balay 
11394e2b4712SSatish Balay /* ----------------------------------------------------------------*/
11404e2b4712SSatish Balay /*
11414e2b4712SSatish Balay      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
11424e2b4712SSatish Balay    except that the data structure of Mat_SeqAIJ is slightly different.
11434e2b4712SSatish Balay    Not a good example of code reuse.
11444e2b4712SSatish Balay */
1145435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat);
1146435faa5fSBarry Smith 
11474e2b4712SSatish Balay #undef __FUNC__
11484e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
1149435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
11504e2b4712SSatish Balay {
11514e2b4712SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
11524e2b4712SSatish Balay   IS          isicol;
11534e2b4712SSatish Balay   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
11544e2b4712SSatish Balay   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
1155335d9088SBarry Smith   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
1156435faa5fSBarry Smith   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill;
11574e2b4712SSatish Balay   PetscTruth  col_identity, row_identity;
1158435faa5fSBarry Smith   double      f;
11594e2b4712SSatish Balay 
11604e2b4712SSatish Balay   PetscFunctionBegin;
1161435faa5fSBarry Smith   if (info) {
1162435faa5fSBarry Smith     f             = info->fill;
1163335d9088SBarry Smith     levels        = (int) info->levels;
1164335d9088SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
1165435faa5fSBarry Smith   } else {
1166435faa5fSBarry Smith     f             = 1.0;
1167435faa5fSBarry Smith     levels        = 0;
1168435faa5fSBarry Smith     diagonal_fill = 0;
1169435faa5fSBarry Smith   }
1170e51c0b9cSSatish Balay   ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr);
1171e51c0b9cSSatish Balay 
11724e2b4712SSatish Balay   /* special case that simply copies fill pattern */
11734e2b4712SSatish Balay   PetscValidHeaderSpecific(isrow,IS_COOKIE);
11744e2b4712SSatish Balay   PetscValidHeaderSpecific(iscol,IS_COOKIE);
1175667159a5SBarry Smith   ierr = ISIdentity(isrow,&row_identity); CHKERRQ(ierr);
1176667159a5SBarry Smith   ierr = ISIdentity(iscol,&col_identity); CHKERRQ(ierr);
11774e2b4712SSatish Balay   if (levels == 0 && row_identity && col_identity) {
1178e1293385SBarry Smith     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact); CHKERRQ(ierr);
11794e2b4712SSatish Balay     (*fact)->factor = FACTOR_LU;
11804e2b4712SSatish Balay     b               = (Mat_SeqBAIJ *) (*fact)->data;
11814e2b4712SSatish Balay     if (!b->diag) {
11824e2b4712SSatish Balay       ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
11834e2b4712SSatish Balay     }
1184435faa5fSBarry Smith     ierr = MatMissingDiag_SeqBAIJ(*fact); CHKERRQ(ierr);
11854e2b4712SSatish Balay     b->row        = isrow;
11864e2b4712SSatish Balay     b->col        = iscol;
1187e51c0b9cSSatish Balay     b->icol       = isicol;
11884e2b4712SSatish Balay     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
11894e2b4712SSatish Balay    /*
1190*15091d37SBarry Smith         Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1191*15091d37SBarry Smith         for ILU(0) factorization with natural ordering
11924e2b4712SSatish Balay    */
1193*15091d37SBarry Smith     switch (b->bs) {
1194*15091d37SBarry Smith     case 2:
1195*15091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1196*15091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1197*15091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1198*15091d37SBarry Smith       break;
1199*15091d37SBarry Smith     case 3:
1200*15091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1201*15091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1202*15091d37SBarry Smith       PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1203*15091d37SBarry Smith       break;
1204*15091d37SBarry Smith     case 4:
1205f830108cSBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1206f830108cSBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1207*15091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1208*15091d37SBarry Smith       break;
1209*15091d37SBarry Smith     case 5:
1210667159a5SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1211667159a5SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1212*15091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1213*15091d37SBarry Smith       break;
1214*15091d37SBarry Smith     case 6:
1215*15091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1216*15091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1217*15091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1218*15091d37SBarry Smith       break;
1219*15091d37SBarry Smith     case 7:
1220*15091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1221*15091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1222*15091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1223*15091d37SBarry Smith     break;
12244e2b4712SSatish Balay   }
12254e2b4712SSatish Balay     PetscFunctionReturn(0);
12264e2b4712SSatish Balay   }
12274e2b4712SSatish Balay 
12284e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr);
12294e2b4712SSatish Balay   ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr);
12304e2b4712SSatish Balay 
12314e2b4712SSatish Balay   /* get new row pointers */
12324e2b4712SSatish Balay   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew);
12334e2b4712SSatish Balay   ainew[0] = 0;
12344e2b4712SSatish Balay   /* don't know how many column pointers are needed so estimate */
12354e2b4712SSatish Balay   jmax = (int) (f*ai[n] + 1);
12364e2b4712SSatish Balay   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew);
12374e2b4712SSatish Balay   /* ajfill is level of fill for each fill entry */
12384e2b4712SSatish Balay   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill);
12394e2b4712SSatish Balay   /* fill is a linked list of nonzeros in active row */
12404e2b4712SSatish Balay   fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill);
12414e2b4712SSatish Balay   /* im is level for each filled value */
12424e2b4712SSatish Balay   im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im);
12434e2b4712SSatish Balay   /* dloc is location of diagonal in factor */
12444e2b4712SSatish Balay   dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc);
12454e2b4712SSatish Balay   dloc[0]  = 0;
12464e2b4712SSatish Balay   for ( prow=0; prow<n; prow++ ) {
1247435faa5fSBarry Smith 
1248435faa5fSBarry Smith     /* copy prow into linked list */
12494e2b4712SSatish Balay     nzf        = nz  = ai[r[prow]+1] - ai[r[prow]];
12504e2b4712SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
12514e2b4712SSatish Balay     xi         = aj + ai[r[prow]];
12524e2b4712SSatish Balay     fill[n]    = n;
1253435faa5fSBarry Smith     fill[prow] = -1; /* marker for diagonal entry */
12544e2b4712SSatish Balay     while (nz--) {
12554e2b4712SSatish Balay       fm  = n;
12564e2b4712SSatish Balay       idx = ic[*xi++];
12574e2b4712SSatish Balay       do {
12584e2b4712SSatish Balay         m  = fm;
12594e2b4712SSatish Balay         fm = fill[m];
12604e2b4712SSatish Balay       } while (fm < idx);
12614e2b4712SSatish Balay       fill[m]   = idx;
12624e2b4712SSatish Balay       fill[idx] = fm;
12634e2b4712SSatish Balay       im[idx]   = 0;
12644e2b4712SSatish Balay     }
1265435faa5fSBarry Smith 
1266435faa5fSBarry Smith     /* make sure diagonal entry is included */
1267435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
1268435faa5fSBarry Smith       fm = n;
1269435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
1270435faa5fSBarry Smith       fill[prow] = fill[fm];  /* insert diagonal into linked list */
1271435faa5fSBarry Smith       fill[fm]   = prow;
1272435faa5fSBarry Smith       im[prow]   = 0;
1273435faa5fSBarry Smith       nzf++;
1274335d9088SBarry Smith       dcount++;
1275435faa5fSBarry Smith     }
1276435faa5fSBarry Smith 
12774e2b4712SSatish Balay     nzi = 0;
12784e2b4712SSatish Balay     row = fill[n];
12794e2b4712SSatish Balay     while ( row < prow ) {
12804e2b4712SSatish Balay       incrlev = im[row] + 1;
12814e2b4712SSatish Balay       nz      = dloc[row];
1282435faa5fSBarry Smith       xi      = ajnew  + ainew[row] + nz + 1;
12834e2b4712SSatish Balay       flev    = ajfill + ainew[row] + nz + 1;
12844e2b4712SSatish Balay       nnz     = ainew[row+1] - ainew[row] - nz - 1;
12854e2b4712SSatish Balay       fm      = row;
12864e2b4712SSatish Balay       while (nnz-- > 0) {
12874e2b4712SSatish Balay         idx = *xi++;
12884e2b4712SSatish Balay         if (*flev + incrlev > levels) {
12894e2b4712SSatish Balay           flev++;
12904e2b4712SSatish Balay           continue;
12914e2b4712SSatish Balay         }
12924e2b4712SSatish Balay         do {
12934e2b4712SSatish Balay           m  = fm;
12944e2b4712SSatish Balay           fm = fill[m];
12954e2b4712SSatish Balay         } while (fm < idx);
12964e2b4712SSatish Balay         if (fm != idx) {
12974e2b4712SSatish Balay           im[idx]   = *flev + incrlev;
12984e2b4712SSatish Balay           fill[m]   = idx;
12994e2b4712SSatish Balay           fill[idx] = fm;
13004e2b4712SSatish Balay           fm        = idx;
13014e2b4712SSatish Balay           nzf++;
1302ecf371e4SBarry Smith         } else {
13034e2b4712SSatish Balay           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
13044e2b4712SSatish Balay         }
13054e2b4712SSatish Balay         flev++;
13064e2b4712SSatish Balay       }
13074e2b4712SSatish Balay       row = fill[row];
13084e2b4712SSatish Balay       nzi++;
13094e2b4712SSatish Balay     }
13104e2b4712SSatish Balay     /* copy new filled row into permanent storage */
13114e2b4712SSatish Balay     ainew[prow+1] = ainew[prow] + nzf;
13124e2b4712SSatish Balay     if (ainew[prow+1] > jmax) {
1313ecf371e4SBarry Smith 
1314ecf371e4SBarry Smith       /* estimate how much additional space we will need */
1315ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1316ecf371e4SBarry Smith       /* just double the memory each time */
1317ecf371e4SBarry Smith       int maxadd = jmax;
1318ecf371e4SBarry Smith       /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */
13194e2b4712SSatish Balay       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
13204e2b4712SSatish Balay       jmax += maxadd;
1321ecf371e4SBarry Smith 
1322ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
13234e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
13244e2b4712SSatish Balay       PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));
13254e2b4712SSatish Balay       PetscFree(ajnew);
13264e2b4712SSatish Balay       ajnew = xi;
13274e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
13284e2b4712SSatish Balay       PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));
13294e2b4712SSatish Balay       PetscFree(ajfill);
13304e2b4712SSatish Balay       ajfill = xi;
1331ecf371e4SBarry Smith       realloc++; /* count how many reallocations are needed */
13324e2b4712SSatish Balay     }
13334e2b4712SSatish Balay     xi          = ajnew + ainew[prow];
13344e2b4712SSatish Balay     flev        = ajfill + ainew[prow];
13354e2b4712SSatish Balay     dloc[prow]  = nzi;
13364e2b4712SSatish Balay     fm          = fill[n];
13374e2b4712SSatish Balay     while (nzf--) {
13384e2b4712SSatish Balay       *xi++   = fm;
13394e2b4712SSatish Balay       *flev++ = im[fm];
13404e2b4712SSatish Balay       fm      = fill[fm];
13414e2b4712SSatish Balay     }
1342435faa5fSBarry Smith     /* make sure row has diagonal entry */
1343435faa5fSBarry Smith     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1344435faa5fSBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
1345435faa5fSBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1346435faa5fSBarry Smith     }
13474e2b4712SSatish Balay   }
13484e2b4712SSatish Balay   PetscFree(ajfill);
13494e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr);
13504e2b4712SSatish Balay   ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr);
13514e2b4712SSatish Balay   PetscFree(fill); PetscFree(im);
13524e2b4712SSatish Balay 
13534e2b4712SSatish Balay   {
13544e2b4712SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
1355981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
13564e2b4712SSatish Balay              realloc,f,af);
1357981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
1358981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
1359981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
1360335d9088SBarry Smith     if (diagonal_fill) {
1361335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
1362335d9088SBarry Smith     }
13634e2b4712SSatish Balay   }
13644e2b4712SSatish Balay 
13654e2b4712SSatish Balay   /* put together the new matrix */
13664e2b4712SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1367fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
13684e2b4712SSatish Balay   b = (Mat_SeqBAIJ *) (*fact)->data;
13694e2b4712SSatish Balay   PetscFree(b->imax);
13704e2b4712SSatish Balay   b->singlemalloc = 0;
13713f1db9ecSBarry Smith   len = bs2*ainew[n]*sizeof(MatScalar);
13724e2b4712SSatish Balay   /* the next line frees the default space generated by the Create() */
13734e2b4712SSatish Balay   PetscFree(b->a); PetscFree(b->ilen);
13743f1db9ecSBarry Smith   b->a          = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
13754e2b4712SSatish Balay   b->j          = ajnew;
13764e2b4712SSatish Balay   b->i          = ainew;
13774e2b4712SSatish Balay   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
13784e2b4712SSatish Balay   b->diag       = dloc;
13794e2b4712SSatish Balay   b->ilen       = 0;
13804e2b4712SSatish Balay   b->imax       = 0;
13814e2b4712SSatish Balay   b->row        = isrow;
13824e2b4712SSatish Balay   b->col        = iscol;
1383e51c0b9cSSatish Balay   b->icol       = isicol;
13844e2b4712SSatish Balay   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
13854e2b4712SSatish Balay   /* In b structure:  Free imax, ilen, old a, old j.
13864e2b4712SSatish Balay      Allocate dloc, solve_work, new a, new j */
13874e2b4712SSatish Balay   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
13884e2b4712SSatish Balay   b->maxnz          = b->nz = ainew[n];
13894e2b4712SSatish Balay   (*fact)->factor   = FACTOR_LU;
13904e2b4712SSatish Balay 
13914e2b4712SSatish Balay   (*fact)->info.factor_mallocs    = realloc;
13924e2b4712SSatish Balay   (*fact)->info.fill_ratio_given  = f;
13934e2b4712SSatish Balay   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
13944e2b4712SSatish Balay 
13954e2b4712SSatish Balay   PetscFunctionReturn(0);
13964e2b4712SSatish Balay }
13974e2b4712SSatish Balay 
13984e2b4712SSatish Balay 
13994e2b4712SSatish Balay 
14004e2b4712SSatish Balay 
1401