xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1*f1af5d2fSBarry Smith /*$Id: baijfact2.c,v 1.30 1999/10/24 14:02:28 bsmith Exp bsmith $*/
24e2b4712SSatish Balay /*
34e2b4712SSatish Balay     Factorization code for BAIJ format.
44e2b4712SSatish Balay */
54e2b4712SSatish Balay 
64e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h"
74e2b4712SSatish Balay #include "src/vec/vecimpl.h"
84e2b4712SSatish Balay #include "src/inline/ilu.h"
974c49faeSBarry Smith #include "src/inline/dot.h"
104e2b4712SSatish Balay 
11*f1af5d2fSBarry Smith #undef __FUNC__
12*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_1_NaturalOrdering"
13*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
14*f1af5d2fSBarry Smith {
15*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
16*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz;
17*f1af5d2fSBarry Smith   int             *diag = a->diag;
18*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
19*f1af5d2fSBarry Smith   Scalar          s1,*x,*b;
20*f1af5d2fSBarry Smith 
21*f1af5d2fSBarry Smith   PetscFunctionBegin;
22*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
23*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
24*f1af5d2fSBarry Smith 
25*f1af5d2fSBarry Smith   /* forward solve the U^T */
26*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
27*f1af5d2fSBarry Smith 
28*f1af5d2fSBarry Smith     v     = aa + diag[i];
29*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
30*f1af5d2fSBarry Smith     s1    = (*v++)*b[i];
31*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
32*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
33*f1af5d2fSBarry Smith     while (nz--) {
34*f1af5d2fSBarry Smith       x[*vi++]  -= (*v++)*s1;
35*f1af5d2fSBarry Smith     }
36*f1af5d2fSBarry Smith     x[i]   = s1;
37*f1af5d2fSBarry Smith   }
38*f1af5d2fSBarry Smith   /* backward solve the L^T */
39*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
40*f1af5d2fSBarry Smith     v    = aa + diag[i] - 1;
41*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
42*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
43*f1af5d2fSBarry Smith     s1   = x[i];
44*f1af5d2fSBarry Smith     while (nz--) {
45*f1af5d2fSBarry Smith       x[*vi--]   -=  (*v--)*s1;
46*f1af5d2fSBarry Smith     }
47*f1af5d2fSBarry Smith   }
48*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
49*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
50*f1af5d2fSBarry Smith   PLogFlops(2*(a->nz) - a->n);
51*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
52*f1af5d2fSBarry Smith }
53*f1af5d2fSBarry Smith 
54*f1af5d2fSBarry Smith #undef __FUNC__
55*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_2_NaturalOrdering"
56*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
57*f1af5d2fSBarry Smith {
58*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
59*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
60*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
61*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
62*f1af5d2fSBarry Smith   Scalar          s1,s2,x1,x2;
63*f1af5d2fSBarry Smith   Scalar          *x,*b;
64*f1af5d2fSBarry Smith 
65*f1af5d2fSBarry Smith   PetscFunctionBegin;
66*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
67*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
68*f1af5d2fSBarry Smith 
69*f1af5d2fSBarry Smith   /* forward solve the U^T */
70*f1af5d2fSBarry Smith   idx = 0;
71*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
72*f1af5d2fSBarry Smith 
73*f1af5d2fSBarry Smith     v     = aa + 4*diag[i];
74*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
75*f1af5d2fSBarry Smith     x1 = b[idx];   x2 = b[1+idx];
76*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2;
77*f1af5d2fSBarry Smith     s2 = v[2]*x1  +  v[3]*x2;
78*f1af5d2fSBarry Smith     v += 4;
79*f1af5d2fSBarry Smith 
80*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
81*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
82*f1af5d2fSBarry Smith     while (nz--) {
83*f1af5d2fSBarry Smith       oidx = 2*(*vi++);
84*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2;
85*f1af5d2fSBarry Smith       x[oidx+1] -= v[2]*s1  +  v[3]*s2;
86*f1af5d2fSBarry Smith       v  += 4;
87*f1af5d2fSBarry Smith     }
88*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2;
89*f1af5d2fSBarry Smith     idx += 2;
90*f1af5d2fSBarry Smith   }
91*f1af5d2fSBarry Smith   /* backward solve the L^T */
92*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
93*f1af5d2fSBarry Smith     v    = aa + 4*diag[i] - 4;
94*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
95*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
96*f1af5d2fSBarry Smith     idt  = 2*i;
97*f1af5d2fSBarry Smith     s1   = x[idt];  s2 = x[1+idt];
98*f1af5d2fSBarry Smith     while (nz--) {
99*f1af5d2fSBarry Smith       idx   = 2*(*vi--);
100*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2;
101*f1af5d2fSBarry Smith       x[idx+1] -=  v[2]*s1 +  v[3]*s2;
102*f1af5d2fSBarry Smith       v -= 4;
103*f1af5d2fSBarry Smith     }
104*f1af5d2fSBarry Smith   }
105*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
106*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
107*f1af5d2fSBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
108*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
109*f1af5d2fSBarry Smith }
110*f1af5d2fSBarry Smith 
111*f1af5d2fSBarry Smith #undef __FUNC__
112*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_3_NaturalOrdering"
113*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
114*f1af5d2fSBarry Smith {
115*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
116*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
117*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
118*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
119*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,x1,x2,x3;
120*f1af5d2fSBarry Smith   Scalar          *x,*b;
121*f1af5d2fSBarry Smith 
122*f1af5d2fSBarry Smith   PetscFunctionBegin;
123*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
124*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
125*f1af5d2fSBarry Smith 
126*f1af5d2fSBarry Smith   /* forward solve the U^T */
127*f1af5d2fSBarry Smith   idx = 0;
128*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
129*f1af5d2fSBarry Smith 
130*f1af5d2fSBarry Smith     v     = aa + 9*diag[i];
131*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
132*f1af5d2fSBarry Smith     x1 = b[idx];   x2 = b[1+idx]; x3    = b[2+idx];
133*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3;
134*f1af5d2fSBarry Smith     s2 = v[3]*x1  +  v[4]*x2 +  v[5]*x3;
135*f1af5d2fSBarry Smith     s3 = v[6]*x1  +  v[7]*x2 + v[8]*x3;
136*f1af5d2fSBarry Smith     v += 9;
137*f1af5d2fSBarry Smith 
138*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
139*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
140*f1af5d2fSBarry Smith     while (nz--) {
141*f1af5d2fSBarry Smith       oidx = 3*(*vi++);
142*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3;
143*f1af5d2fSBarry Smith       x[oidx+1] -= v[3]*s1  +  v[4]*s2 +  v[5]*s3;
144*f1af5d2fSBarry Smith       x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
145*f1af5d2fSBarry Smith       v  += 9;
146*f1af5d2fSBarry Smith     }
147*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;
148*f1af5d2fSBarry Smith     idx += 3;
149*f1af5d2fSBarry Smith   }
150*f1af5d2fSBarry Smith   /* backward solve the L^T */
151*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
152*f1af5d2fSBarry Smith     v    = aa + 9*diag[i] - 9;
153*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
154*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
155*f1af5d2fSBarry Smith     idt  = 3*i;
156*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];
157*f1af5d2fSBarry Smith     while (nz--) {
158*f1af5d2fSBarry Smith       idx   = 3*(*vi--);
159*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3;
160*f1af5d2fSBarry Smith       x[idx+1] -=  v[3]*s1 +  v[4]*s2 +  v[5]*s3;
161*f1af5d2fSBarry Smith       x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
162*f1af5d2fSBarry Smith       v -= 9;
163*f1af5d2fSBarry Smith     }
164*f1af5d2fSBarry Smith   }
165*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
166*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
167*f1af5d2fSBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
168*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
169*f1af5d2fSBarry Smith }
170*f1af5d2fSBarry Smith 
171*f1af5d2fSBarry Smith #undef __FUNC__
172*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_4_NaturalOrdering"
173*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
174*f1af5d2fSBarry Smith {
175*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
176*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
177*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
178*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
179*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,x1,x2,x3,x4;
180*f1af5d2fSBarry Smith   Scalar          *x,*b;
181*f1af5d2fSBarry Smith 
182*f1af5d2fSBarry Smith   PetscFunctionBegin;
183*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
184*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
185*f1af5d2fSBarry Smith 
186*f1af5d2fSBarry Smith   /* forward solve the U^T */
187*f1af5d2fSBarry Smith   idx = 0;
188*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
189*f1af5d2fSBarry Smith 
190*f1af5d2fSBarry Smith     v     = aa + 16*diag[i];
191*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
192*f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx];
193*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
194*f1af5d2fSBarry Smith     s2 = v[4]*x1  +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
195*f1af5d2fSBarry Smith     s3 = v[8]*x1  +  v[9]*x2 + v[10]*x3 + v[11]*x4;
196*f1af5d2fSBarry Smith     s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
197*f1af5d2fSBarry Smith     v += 16;
198*f1af5d2fSBarry Smith 
199*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
200*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
201*f1af5d2fSBarry Smith     while (nz--) {
202*f1af5d2fSBarry Smith       oidx = 4*(*vi++);
203*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
204*f1af5d2fSBarry Smith       x[oidx+1] -= v[4]*s1  +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
205*f1af5d2fSBarry Smith       x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
206*f1af5d2fSBarry Smith       x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
207*f1af5d2fSBarry Smith       v  += 16;
208*f1af5d2fSBarry Smith     }
209*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4;
210*f1af5d2fSBarry Smith     idx += 4;
211*f1af5d2fSBarry Smith   }
212*f1af5d2fSBarry Smith   /* backward solve the L^T */
213*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
214*f1af5d2fSBarry Smith     v    = aa + 16*diag[i] - 16;
215*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
216*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
217*f1af5d2fSBarry Smith     idt  = 4*i;
218*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt];
219*f1af5d2fSBarry Smith     while (nz--) {
220*f1af5d2fSBarry Smith       idx   = 4*(*vi--);
221*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
222*f1af5d2fSBarry Smith       x[idx+1] -=  v[4]*s1 +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
223*f1af5d2fSBarry Smith       x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
224*f1af5d2fSBarry Smith       x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
225*f1af5d2fSBarry Smith       v -= 16;
226*f1af5d2fSBarry Smith     }
227*f1af5d2fSBarry Smith   }
228*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
229*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
230*f1af5d2fSBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
231*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
232*f1af5d2fSBarry Smith }
233*f1af5d2fSBarry Smith 
234*f1af5d2fSBarry Smith #undef __FUNC__
235*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_5_NaturalOrdering"
236*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
237*f1af5d2fSBarry Smith {
238*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
239*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
240*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
241*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
242*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
243*f1af5d2fSBarry Smith   Scalar          *x,*b;
244*f1af5d2fSBarry Smith 
245*f1af5d2fSBarry Smith   PetscFunctionBegin;
246*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
247*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
248*f1af5d2fSBarry Smith 
249*f1af5d2fSBarry Smith   /* forward solve the U^T */
250*f1af5d2fSBarry Smith   idx = 0;
251*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
252*f1af5d2fSBarry Smith 
253*f1af5d2fSBarry Smith     v     = aa + 25*diag[i];
254*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
255*f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
256*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
257*f1af5d2fSBarry Smith     s2 = v[5]*x1  +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
258*f1af5d2fSBarry Smith     s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
259*f1af5d2fSBarry Smith     s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
260*f1af5d2fSBarry Smith     s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
261*f1af5d2fSBarry Smith     v += 25;
262*f1af5d2fSBarry Smith 
263*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
264*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
265*f1af5d2fSBarry Smith     while (nz--) {
266*f1af5d2fSBarry Smith       oidx = 5*(*vi++);
267*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
268*f1af5d2fSBarry Smith       x[oidx+1] -= v[5]*s1  +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
269*f1af5d2fSBarry Smith       x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
270*f1af5d2fSBarry Smith       x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
271*f1af5d2fSBarry Smith       x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
272*f1af5d2fSBarry Smith       v  += 25;
273*f1af5d2fSBarry Smith     }
274*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
275*f1af5d2fSBarry Smith     idx += 5;
276*f1af5d2fSBarry Smith   }
277*f1af5d2fSBarry Smith   /* backward solve the L^T */
278*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
279*f1af5d2fSBarry Smith     v    = aa + 25*diag[i] - 25;
280*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
281*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
282*f1af5d2fSBarry Smith     idt  = 5*i;
283*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
284*f1af5d2fSBarry Smith     while (nz--) {
285*f1af5d2fSBarry Smith       idx   = 5*(*vi--);
286*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
287*f1af5d2fSBarry Smith       x[idx+1] -=  v[5]*s1 +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
288*f1af5d2fSBarry Smith       x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
289*f1af5d2fSBarry Smith       x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
290*f1af5d2fSBarry Smith       x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
291*f1af5d2fSBarry Smith       v -= 25;
292*f1af5d2fSBarry Smith     }
293*f1af5d2fSBarry Smith   }
294*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
295*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
296*f1af5d2fSBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
297*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
298*f1af5d2fSBarry Smith }
299*f1af5d2fSBarry Smith 
300*f1af5d2fSBarry Smith #undef __FUNC__
301*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_6_NaturalOrdering"
302*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
303*f1af5d2fSBarry Smith {
304*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
305*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
306*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
307*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
308*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
309*f1af5d2fSBarry Smith   Scalar          *x,*b;
310*f1af5d2fSBarry Smith 
311*f1af5d2fSBarry Smith   PetscFunctionBegin;
312*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
313*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
314*f1af5d2fSBarry Smith 
315*f1af5d2fSBarry Smith   /* forward solve the U^T */
316*f1af5d2fSBarry Smith   idx = 0;
317*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
318*f1af5d2fSBarry Smith 
319*f1af5d2fSBarry Smith     v     = aa + 36*diag[i];
320*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
321*f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
322*f1af5d2fSBarry Smith     x6    = b[5+idx];
323*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6;
324*f1af5d2fSBarry Smith     s2 = v[6]*x1  +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
325*f1af5d2fSBarry Smith     s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
326*f1af5d2fSBarry Smith     s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
327*f1af5d2fSBarry Smith     s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
328*f1af5d2fSBarry Smith     s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
329*f1af5d2fSBarry Smith     v += 36;
330*f1af5d2fSBarry Smith 
331*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
332*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
333*f1af5d2fSBarry Smith     while (nz--) {
334*f1af5d2fSBarry Smith       oidx = 6*(*vi++);
335*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
336*f1af5d2fSBarry Smith       x[oidx+1] -= v[6]*s1  +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
337*f1af5d2fSBarry Smith       x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
338*f1af5d2fSBarry Smith       x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
339*f1af5d2fSBarry Smith       x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
340*f1af5d2fSBarry Smith       x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
341*f1af5d2fSBarry Smith       v  += 36;
342*f1af5d2fSBarry Smith     }
343*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
344*f1af5d2fSBarry Smith     x[5+idx] = s6;
345*f1af5d2fSBarry Smith     idx += 6;
346*f1af5d2fSBarry Smith   }
347*f1af5d2fSBarry Smith   /* backward solve the L^T */
348*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
349*f1af5d2fSBarry Smith     v    = aa + 36*diag[i] - 36;
350*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
351*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
352*f1af5d2fSBarry Smith     idt  = 6*i;
353*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
354*f1af5d2fSBarry Smith     s6 = x[5+idt];
355*f1af5d2fSBarry Smith     while (nz--) {
356*f1af5d2fSBarry Smith       idx   = 6*(*vi--);
357*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
358*f1af5d2fSBarry Smith       x[idx+1] -=  v[6]*s1 +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
359*f1af5d2fSBarry Smith       x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
360*f1af5d2fSBarry Smith       x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
361*f1af5d2fSBarry Smith       x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
362*f1af5d2fSBarry Smith       x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
363*f1af5d2fSBarry Smith       v -= 36;
364*f1af5d2fSBarry Smith     }
365*f1af5d2fSBarry Smith   }
366*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
367*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
368*f1af5d2fSBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
369*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
370*f1af5d2fSBarry Smith }
371*f1af5d2fSBarry Smith 
372*f1af5d2fSBarry Smith #undef __FUNC__
373*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_7_NaturalOrdering"
374*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
375*f1af5d2fSBarry Smith {
376*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
377*f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
378*f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
379*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
380*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
381*f1af5d2fSBarry Smith   Scalar          *x,*b;
382*f1af5d2fSBarry Smith 
383*f1af5d2fSBarry Smith   PetscFunctionBegin;
384*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
385*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
386*f1af5d2fSBarry Smith 
387*f1af5d2fSBarry Smith   /* forward solve the U^T */
388*f1af5d2fSBarry Smith   idx = 0;
389*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
390*f1af5d2fSBarry Smith 
391*f1af5d2fSBarry Smith     v     = aa + 49*diag[i];
392*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
393*f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
394*f1af5d2fSBarry Smith     x6    = b[5+idx]; x7 = b[6+idx];
395*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6 +  v[6]*x7;
396*f1af5d2fSBarry Smith     s2 = v[7]*x1  +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
397*f1af5d2fSBarry Smith     s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
398*f1af5d2fSBarry Smith     s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
399*f1af5d2fSBarry Smith     s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
400*f1af5d2fSBarry Smith     s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
401*f1af5d2fSBarry Smith     s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
402*f1af5d2fSBarry Smith     v += 49;
403*f1af5d2fSBarry Smith 
404*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
405*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
406*f1af5d2fSBarry Smith     while (nz--) {
407*f1af5d2fSBarry Smith       oidx = 7*(*vi++);
408*f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
409*f1af5d2fSBarry Smith       x[oidx+1] -= v[7]*s1  +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
410*f1af5d2fSBarry Smith       x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
411*f1af5d2fSBarry Smith       x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
412*f1af5d2fSBarry Smith       x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
413*f1af5d2fSBarry Smith       x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
414*f1af5d2fSBarry Smith       x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
415*f1af5d2fSBarry Smith       v  += 49;
416*f1af5d2fSBarry Smith     }
417*f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
418*f1af5d2fSBarry Smith     x[5+idx] = s6;x[6+idx] = s7;
419*f1af5d2fSBarry Smith     idx += 7;
420*f1af5d2fSBarry Smith   }
421*f1af5d2fSBarry Smith   /* backward solve the L^T */
422*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
423*f1af5d2fSBarry Smith     v    = aa + 49*diag[i] - 49;
424*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
425*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
426*f1af5d2fSBarry Smith     idt  = 7*i;
427*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
428*f1af5d2fSBarry Smith     s6 = x[5+idt];s7 = x[6+idt];
429*f1af5d2fSBarry Smith     while (nz--) {
430*f1af5d2fSBarry Smith       idx   = 7*(*vi--);
431*f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
432*f1af5d2fSBarry Smith       x[idx+1] -=  v[7]*s1 +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
433*f1af5d2fSBarry Smith       x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
434*f1af5d2fSBarry Smith       x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
435*f1af5d2fSBarry Smith       x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
436*f1af5d2fSBarry Smith       x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
437*f1af5d2fSBarry Smith       x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
438*f1af5d2fSBarry Smith       v -= 49;
439*f1af5d2fSBarry Smith     }
440*f1af5d2fSBarry Smith   }
441*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
442*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
443*f1af5d2fSBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
444*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
445*f1af5d2fSBarry Smith }
446*f1af5d2fSBarry Smith 
447*f1af5d2fSBarry Smith /*---------------------------------------------------------------------------------------------*/
448*f1af5d2fSBarry Smith #undef __FUNC__
449*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_1"
450*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
451*f1af5d2fSBarry Smith {
452*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
453*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
454*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
455*f1af5d2fSBarry Smith   int             *diag = a->diag;
456*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
457*f1af5d2fSBarry Smith   Scalar          s1,*x,*b,*t;
458*f1af5d2fSBarry Smith 
459*f1af5d2fSBarry Smith   PetscFunctionBegin;
460*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
461*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
462*f1af5d2fSBarry Smith   t  = a->solve_work;
463*f1af5d2fSBarry Smith 
464*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
465*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
466*f1af5d2fSBarry Smith 
467*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
468*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
469*f1af5d2fSBarry Smith     t[i] = b[c[i]];
470*f1af5d2fSBarry Smith   }
471*f1af5d2fSBarry Smith 
472*f1af5d2fSBarry Smith   /* forward solve the U^T */
473*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
474*f1af5d2fSBarry Smith 
475*f1af5d2fSBarry Smith     v     = aa + diag[i];
476*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
477*f1af5d2fSBarry Smith     s1    = (*v++)*t[i];
478*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
479*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
480*f1af5d2fSBarry Smith     while (nz--) {
481*f1af5d2fSBarry Smith       t[*vi++]  -= (*v++)*s1;
482*f1af5d2fSBarry Smith     }
483*f1af5d2fSBarry Smith     t[i]   = s1;
484*f1af5d2fSBarry Smith   }
485*f1af5d2fSBarry Smith   /* backward solve the L^T */
486*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
487*f1af5d2fSBarry Smith     v    = aa + diag[i] - 1;
488*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
489*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
490*f1af5d2fSBarry Smith     s1   = t[i];
491*f1af5d2fSBarry Smith     while (nz--) {
492*f1af5d2fSBarry Smith       t[*vi--]   -=  (*v--)*s1;
493*f1af5d2fSBarry Smith     }
494*f1af5d2fSBarry Smith   }
495*f1af5d2fSBarry Smith 
496*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
497*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
498*f1af5d2fSBarry Smith     x[r[i]]   = t[i];
499*f1af5d2fSBarry Smith   }
500*f1af5d2fSBarry Smith 
501*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
502*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
503*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
504*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
505*f1af5d2fSBarry Smith   PLogFlops(2*(a->nz) - a->n);
506*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
507*f1af5d2fSBarry Smith }
508*f1af5d2fSBarry Smith 
509*f1af5d2fSBarry Smith #undef __FUNC__
510*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_2"
511*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
512*f1af5d2fSBarry Smith {
513*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
514*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
515*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
516*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
517*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
518*f1af5d2fSBarry Smith   Scalar          s1,s2,x1,x2;
519*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
520*f1af5d2fSBarry Smith 
521*f1af5d2fSBarry Smith   PetscFunctionBegin;
522*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
523*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
524*f1af5d2fSBarry Smith   t  = a->solve_work;
525*f1af5d2fSBarry Smith 
526*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
527*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
528*f1af5d2fSBarry Smith 
529*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
530*f1af5d2fSBarry Smith   ii = 0;
531*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
532*f1af5d2fSBarry Smith     ic      = 2*c[i];
533*f1af5d2fSBarry Smith     t[ii]   = b[ic];
534*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
535*f1af5d2fSBarry Smith     ii += 2;
536*f1af5d2fSBarry Smith   }
537*f1af5d2fSBarry Smith 
538*f1af5d2fSBarry Smith   /* forward solve the U^T */
539*f1af5d2fSBarry Smith   idx = 0;
540*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
541*f1af5d2fSBarry Smith 
542*f1af5d2fSBarry Smith     v     = aa + 4*diag[i];
543*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
544*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx];
545*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2;
546*f1af5d2fSBarry Smith     s2 = v[2]*x1  +  v[3]*x2;
547*f1af5d2fSBarry Smith     v += 4;
548*f1af5d2fSBarry Smith 
549*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
550*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
551*f1af5d2fSBarry Smith     while (nz--) {
552*f1af5d2fSBarry Smith       oidx = 2*(*vi++);
553*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2;
554*f1af5d2fSBarry Smith       t[oidx+1] -= v[2]*s1  +  v[3]*s2;
555*f1af5d2fSBarry Smith       v  += 4;
556*f1af5d2fSBarry Smith     }
557*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
558*f1af5d2fSBarry Smith     idx += 2;
559*f1af5d2fSBarry Smith   }
560*f1af5d2fSBarry Smith   /* backward solve the L^T */
561*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
562*f1af5d2fSBarry Smith     v    = aa + 4*diag[i] - 4;
563*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
564*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
565*f1af5d2fSBarry Smith     idt  = 2*i;
566*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
567*f1af5d2fSBarry Smith     while (nz--) {
568*f1af5d2fSBarry Smith       idx   = 2*(*vi--);
569*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2;
570*f1af5d2fSBarry Smith       t[idx+1] -=  v[2]*s1 +  v[3]*s2;
571*f1af5d2fSBarry Smith       v -= 4;
572*f1af5d2fSBarry Smith     }
573*f1af5d2fSBarry Smith   }
574*f1af5d2fSBarry Smith 
575*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
576*f1af5d2fSBarry Smith   ii = 0;
577*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
578*f1af5d2fSBarry Smith     ir      = 2*r[i];
579*f1af5d2fSBarry Smith     x[ir]   = t[ii];
580*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
581*f1af5d2fSBarry Smith     ii += 2;
582*f1af5d2fSBarry Smith   }
583*f1af5d2fSBarry Smith 
584*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
585*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
586*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
587*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
588*f1af5d2fSBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
589*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
590*f1af5d2fSBarry Smith }
591*f1af5d2fSBarry Smith 
592*f1af5d2fSBarry Smith #undef __FUNC__
593*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_3"
594*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
595*f1af5d2fSBarry Smith {
596*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
597*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
598*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
599*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
600*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
601*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,x1,x2,x3;
602*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
603*f1af5d2fSBarry Smith 
604*f1af5d2fSBarry Smith   PetscFunctionBegin;
605*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
606*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607*f1af5d2fSBarry Smith   t  = a->solve_work;
608*f1af5d2fSBarry Smith 
609*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
610*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
611*f1af5d2fSBarry Smith 
612*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
613*f1af5d2fSBarry Smith   ii = 0;
614*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
615*f1af5d2fSBarry Smith     ic      = 3*c[i];
616*f1af5d2fSBarry Smith     t[ii]   = b[ic];
617*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
618*f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
619*f1af5d2fSBarry Smith     ii += 3;
620*f1af5d2fSBarry Smith   }
621*f1af5d2fSBarry Smith 
622*f1af5d2fSBarry Smith   /* forward solve the U^T */
623*f1af5d2fSBarry Smith   idx = 0;
624*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
625*f1af5d2fSBarry Smith 
626*f1af5d2fSBarry Smith     v     = aa + 9*diag[i];
627*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
628*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx];
629*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3;
630*f1af5d2fSBarry Smith     s2 = v[3]*x1  +  v[4]*x2 +  v[5]*x3;
631*f1af5d2fSBarry Smith     s3 = v[6]*x1  +  v[7]*x2 + v[8]*x3;
632*f1af5d2fSBarry Smith     v += 9;
633*f1af5d2fSBarry Smith 
634*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
635*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
636*f1af5d2fSBarry Smith     while (nz--) {
637*f1af5d2fSBarry Smith       oidx = 3*(*vi++);
638*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3;
639*f1af5d2fSBarry Smith       t[oidx+1] -= v[3]*s1  +  v[4]*s2 +  v[5]*s3;
640*f1af5d2fSBarry Smith       t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
641*f1af5d2fSBarry Smith       v  += 9;
642*f1af5d2fSBarry Smith     }
643*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;
644*f1af5d2fSBarry Smith     idx += 3;
645*f1af5d2fSBarry Smith   }
646*f1af5d2fSBarry Smith   /* backward solve the L^T */
647*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
648*f1af5d2fSBarry Smith     v    = aa + 9*diag[i] - 9;
649*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
650*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
651*f1af5d2fSBarry Smith     idt  = 3*i;
652*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];
653*f1af5d2fSBarry Smith     while (nz--) {
654*f1af5d2fSBarry Smith       idx   = 3*(*vi--);
655*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3;
656*f1af5d2fSBarry Smith       t[idx+1] -=  v[3]*s1 +  v[4]*s2 +  v[5]*s3;
657*f1af5d2fSBarry Smith       t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
658*f1af5d2fSBarry Smith       v -= 9;
659*f1af5d2fSBarry Smith     }
660*f1af5d2fSBarry Smith   }
661*f1af5d2fSBarry Smith 
662*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
663*f1af5d2fSBarry Smith   ii = 0;
664*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
665*f1af5d2fSBarry Smith     ir      = 3*r[i];
666*f1af5d2fSBarry Smith     x[ir]   = t[ii];
667*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
668*f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
669*f1af5d2fSBarry Smith     ii += 3;
670*f1af5d2fSBarry Smith   }
671*f1af5d2fSBarry Smith 
672*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
673*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
674*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
675*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
676*f1af5d2fSBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
677*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
678*f1af5d2fSBarry Smith }
679*f1af5d2fSBarry Smith 
680*f1af5d2fSBarry Smith #undef __FUNC__
681*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_4"
682*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
683*f1af5d2fSBarry Smith {
684*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
685*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
686*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
687*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
688*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
689*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,x1,x2,x3,x4;
690*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
691*f1af5d2fSBarry Smith 
692*f1af5d2fSBarry Smith   PetscFunctionBegin;
693*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
694*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
695*f1af5d2fSBarry Smith   t  = a->solve_work;
696*f1af5d2fSBarry Smith 
697*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
698*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
699*f1af5d2fSBarry Smith 
700*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
701*f1af5d2fSBarry Smith   ii = 0;
702*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
703*f1af5d2fSBarry Smith     ic      = 4*c[i];
704*f1af5d2fSBarry Smith     t[ii]   = b[ic];
705*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
706*f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
707*f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
708*f1af5d2fSBarry Smith     ii += 4;
709*f1af5d2fSBarry Smith   }
710*f1af5d2fSBarry Smith 
711*f1af5d2fSBarry Smith   /* forward solve the U^T */
712*f1af5d2fSBarry Smith   idx = 0;
713*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
714*f1af5d2fSBarry Smith 
715*f1af5d2fSBarry Smith     v     = aa + 16*diag[i];
716*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
717*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx];
718*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
719*f1af5d2fSBarry Smith     s2 = v[4]*x1  +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
720*f1af5d2fSBarry Smith     s3 = v[8]*x1  +  v[9]*x2 + v[10]*x3 + v[11]*x4;
721*f1af5d2fSBarry Smith     s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
722*f1af5d2fSBarry Smith     v += 16;
723*f1af5d2fSBarry Smith 
724*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
725*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
726*f1af5d2fSBarry Smith     while (nz--) {
727*f1af5d2fSBarry Smith       oidx = 4*(*vi++);
728*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
729*f1af5d2fSBarry Smith       t[oidx+1] -= v[4]*s1  +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
730*f1af5d2fSBarry Smith       t[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
731*f1af5d2fSBarry Smith       t[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
732*f1af5d2fSBarry Smith       v  += 16;
733*f1af5d2fSBarry Smith     }
734*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4;
735*f1af5d2fSBarry Smith     idx += 4;
736*f1af5d2fSBarry Smith   }
737*f1af5d2fSBarry Smith   /* backward solve the L^T */
738*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
739*f1af5d2fSBarry Smith     v    = aa + 16*diag[i] - 16;
740*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
741*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
742*f1af5d2fSBarry Smith     idt  = 4*i;
743*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt];
744*f1af5d2fSBarry Smith     while (nz--) {
745*f1af5d2fSBarry Smith       idx   = 4*(*vi--);
746*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
747*f1af5d2fSBarry Smith       t[idx+1] -=  v[4]*s1 +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
748*f1af5d2fSBarry Smith       t[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
749*f1af5d2fSBarry Smith       t[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
750*f1af5d2fSBarry Smith       v -= 16;
751*f1af5d2fSBarry Smith     }
752*f1af5d2fSBarry Smith   }
753*f1af5d2fSBarry Smith 
754*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
755*f1af5d2fSBarry Smith   ii = 0;
756*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
757*f1af5d2fSBarry Smith     ir      = 4*r[i];
758*f1af5d2fSBarry Smith     x[ir]   = t[ii];
759*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
760*f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
761*f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
762*f1af5d2fSBarry Smith     ii += 4;
763*f1af5d2fSBarry Smith   }
764*f1af5d2fSBarry Smith 
765*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
766*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
767*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
768*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
769*f1af5d2fSBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
770*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
771*f1af5d2fSBarry Smith }
772*f1af5d2fSBarry Smith 
773*f1af5d2fSBarry Smith #undef __FUNC__
774*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_5"
775*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
776*f1af5d2fSBarry Smith {
777*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
778*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
779*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
780*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
781*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
782*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
783*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
784*f1af5d2fSBarry Smith 
785*f1af5d2fSBarry Smith   PetscFunctionBegin;
786*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
787*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
788*f1af5d2fSBarry Smith   t  = a->solve_work;
789*f1af5d2fSBarry Smith 
790*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
791*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
792*f1af5d2fSBarry Smith 
793*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
794*f1af5d2fSBarry Smith   ii = 0;
795*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
796*f1af5d2fSBarry Smith     ic      = 5*c[i];
797*f1af5d2fSBarry Smith     t[ii]   = b[ic];
798*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
799*f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
800*f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
801*f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
802*f1af5d2fSBarry Smith     ii += 5;
803*f1af5d2fSBarry Smith   }
804*f1af5d2fSBarry Smith 
805*f1af5d2fSBarry Smith   /* forward solve the U^T */
806*f1af5d2fSBarry Smith   idx = 0;
807*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
808*f1af5d2fSBarry Smith 
809*f1af5d2fSBarry Smith     v     = aa + 25*diag[i];
810*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
811*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
812*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
813*f1af5d2fSBarry Smith     s2 = v[5]*x1  +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
814*f1af5d2fSBarry Smith     s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
815*f1af5d2fSBarry Smith     s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
816*f1af5d2fSBarry Smith     s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
817*f1af5d2fSBarry Smith     v += 25;
818*f1af5d2fSBarry Smith 
819*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
820*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
821*f1af5d2fSBarry Smith     while (nz--) {
822*f1af5d2fSBarry Smith       oidx = 5*(*vi++);
823*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
824*f1af5d2fSBarry Smith       t[oidx+1] -= v[5]*s1  +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
825*f1af5d2fSBarry Smith       t[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
826*f1af5d2fSBarry Smith       t[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
827*f1af5d2fSBarry Smith       t[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
828*f1af5d2fSBarry Smith       v  += 25;
829*f1af5d2fSBarry Smith     }
830*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
831*f1af5d2fSBarry Smith     idx += 5;
832*f1af5d2fSBarry Smith   }
833*f1af5d2fSBarry Smith   /* backward solve the L^T */
834*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
835*f1af5d2fSBarry Smith     v    = aa + 25*diag[i] - 25;
836*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
837*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
838*f1af5d2fSBarry Smith     idt  = 5*i;
839*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
840*f1af5d2fSBarry Smith     while (nz--) {
841*f1af5d2fSBarry Smith       idx   = 5*(*vi--);
842*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
843*f1af5d2fSBarry Smith       t[idx+1] -=  v[5]*s1 +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
844*f1af5d2fSBarry Smith       t[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
845*f1af5d2fSBarry Smith       t[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
846*f1af5d2fSBarry Smith       t[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
847*f1af5d2fSBarry Smith       v -= 25;
848*f1af5d2fSBarry Smith     }
849*f1af5d2fSBarry Smith   }
850*f1af5d2fSBarry Smith 
851*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
852*f1af5d2fSBarry Smith   ii = 0;
853*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
854*f1af5d2fSBarry Smith     ir      = 5*r[i];
855*f1af5d2fSBarry Smith     x[ir]   = t[ii];
856*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
857*f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
858*f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
859*f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
860*f1af5d2fSBarry Smith     ii += 5;
861*f1af5d2fSBarry Smith   }
862*f1af5d2fSBarry Smith 
863*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
864*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
865*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
866*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
867*f1af5d2fSBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
868*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
869*f1af5d2fSBarry Smith }
870*f1af5d2fSBarry Smith 
871*f1af5d2fSBarry Smith #undef __FUNC__
872*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_6"
873*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
874*f1af5d2fSBarry Smith {
875*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
876*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
877*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
878*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
879*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
880*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
881*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
882*f1af5d2fSBarry Smith 
883*f1af5d2fSBarry Smith   PetscFunctionBegin;
884*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
885*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886*f1af5d2fSBarry Smith   t  = a->solve_work;
887*f1af5d2fSBarry Smith 
888*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
889*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
890*f1af5d2fSBarry Smith 
891*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
892*f1af5d2fSBarry Smith   ii = 0;
893*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
894*f1af5d2fSBarry Smith     ic      = 6*c[i];
895*f1af5d2fSBarry Smith     t[ii]   = b[ic];
896*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
897*f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
898*f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
899*f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
900*f1af5d2fSBarry Smith     t[ii+5] = b[ic+5];
901*f1af5d2fSBarry Smith     ii += 6;
902*f1af5d2fSBarry Smith   }
903*f1af5d2fSBarry Smith 
904*f1af5d2fSBarry Smith   /* forward solve the U^T */
905*f1af5d2fSBarry Smith   idx = 0;
906*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
907*f1af5d2fSBarry Smith 
908*f1af5d2fSBarry Smith     v     = aa + 36*diag[i];
909*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
910*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
911*f1af5d2fSBarry Smith     x6    = t[5+idx];
912*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6;
913*f1af5d2fSBarry Smith     s2 = v[6]*x1  +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
914*f1af5d2fSBarry Smith     s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
915*f1af5d2fSBarry Smith     s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
916*f1af5d2fSBarry Smith     s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
917*f1af5d2fSBarry Smith     s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
918*f1af5d2fSBarry Smith     v += 36;
919*f1af5d2fSBarry Smith 
920*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
921*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
922*f1af5d2fSBarry Smith     while (nz--) {
923*f1af5d2fSBarry Smith       oidx = 6*(*vi++);
924*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
925*f1af5d2fSBarry Smith       t[oidx+1] -= v[6]*s1  +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
926*f1af5d2fSBarry Smith       t[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
927*f1af5d2fSBarry Smith       t[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
928*f1af5d2fSBarry Smith       t[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
929*f1af5d2fSBarry Smith       t[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
930*f1af5d2fSBarry Smith       v  += 36;
931*f1af5d2fSBarry Smith     }
932*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
933*f1af5d2fSBarry Smith     t[5+idx] = s6;
934*f1af5d2fSBarry Smith     idx += 6;
935*f1af5d2fSBarry Smith   }
936*f1af5d2fSBarry Smith   /* backward solve the L^T */
937*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
938*f1af5d2fSBarry Smith     v    = aa + 36*diag[i] - 36;
939*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
940*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
941*f1af5d2fSBarry Smith     idt  = 6*i;
942*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
943*f1af5d2fSBarry Smith     s6 = t[5+idt];
944*f1af5d2fSBarry Smith     while (nz--) {
945*f1af5d2fSBarry Smith       idx   = 6*(*vi--);
946*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
947*f1af5d2fSBarry Smith       t[idx+1] -=  v[6]*s1 +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
948*f1af5d2fSBarry Smith       t[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
949*f1af5d2fSBarry Smith       t[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
950*f1af5d2fSBarry Smith       t[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
951*f1af5d2fSBarry Smith       t[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
952*f1af5d2fSBarry Smith       v -= 36;
953*f1af5d2fSBarry Smith     }
954*f1af5d2fSBarry Smith   }
955*f1af5d2fSBarry Smith 
956*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
957*f1af5d2fSBarry Smith   ii = 0;
958*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
959*f1af5d2fSBarry Smith     ir      = 6*r[i];
960*f1af5d2fSBarry Smith     x[ir]   = t[ii];
961*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
962*f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
963*f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
964*f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
965*f1af5d2fSBarry Smith     x[ir+5] = t[ii+5];
966*f1af5d2fSBarry Smith     ii += 6;
967*f1af5d2fSBarry Smith   }
968*f1af5d2fSBarry Smith 
969*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
970*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
971*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
972*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
973*f1af5d2fSBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
974*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
975*f1af5d2fSBarry Smith }
976*f1af5d2fSBarry Smith 
977*f1af5d2fSBarry Smith #undef __FUNC__
978*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_7"
979*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
980*f1af5d2fSBarry Smith {
981*f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
982*f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
983*f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
984*f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
985*f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
986*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
987*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
988*f1af5d2fSBarry Smith 
989*f1af5d2fSBarry Smith   PetscFunctionBegin;
990*f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
991*f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992*f1af5d2fSBarry Smith   t  = a->solve_work;
993*f1af5d2fSBarry Smith 
994*f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
995*f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
996*f1af5d2fSBarry Smith 
997*f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
998*f1af5d2fSBarry Smith   ii = 0;
999*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
1000*f1af5d2fSBarry Smith     ic      = 7*c[i];
1001*f1af5d2fSBarry Smith     t[ii]   = b[ic];
1002*f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
1003*f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
1004*f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
1005*f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
1006*f1af5d2fSBarry Smith     t[ii+5] = b[ic+5];
1007*f1af5d2fSBarry Smith     t[ii+6] = b[ic+6];
1008*f1af5d2fSBarry Smith     ii += 7;
1009*f1af5d2fSBarry Smith   }
1010*f1af5d2fSBarry Smith 
1011*f1af5d2fSBarry Smith   /* forward solve the U^T */
1012*f1af5d2fSBarry Smith   idx = 0;
1013*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
1014*f1af5d2fSBarry Smith 
1015*f1af5d2fSBarry Smith     v     = aa + 49*diag[i];
1016*f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
1017*f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1018*f1af5d2fSBarry Smith     x6    = t[5+idx]; x7 = t[6+idx];
1019*f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6 +  v[6]*x7;
1020*f1af5d2fSBarry Smith     s2 = v[7]*x1  +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1021*f1af5d2fSBarry Smith     s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1022*f1af5d2fSBarry Smith     s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1023*f1af5d2fSBarry Smith     s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1024*f1af5d2fSBarry Smith     s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1025*f1af5d2fSBarry Smith     s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1026*f1af5d2fSBarry Smith     v += 49;
1027*f1af5d2fSBarry Smith 
1028*f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
1029*f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
1030*f1af5d2fSBarry Smith     while (nz--) {
1031*f1af5d2fSBarry Smith       oidx = 7*(*vi++);
1032*f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
1033*f1af5d2fSBarry Smith       t[oidx+1] -= v[7]*s1  +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1034*f1af5d2fSBarry Smith       t[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1035*f1af5d2fSBarry Smith       t[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1036*f1af5d2fSBarry Smith       t[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1037*f1af5d2fSBarry Smith       t[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1038*f1af5d2fSBarry Smith       t[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1039*f1af5d2fSBarry Smith       v  += 49;
1040*f1af5d2fSBarry Smith     }
1041*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1042*f1af5d2fSBarry Smith     t[5+idx] = s6;t[6+idx] = s7;
1043*f1af5d2fSBarry Smith     idx += 7;
1044*f1af5d2fSBarry Smith   }
1045*f1af5d2fSBarry Smith   /* backward solve the L^T */
1046*f1af5d2fSBarry Smith   for ( i=n-1; i>=0; i-- ){
1047*f1af5d2fSBarry Smith     v    = aa + 49*diag[i] - 49;
1048*f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
1049*f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
1050*f1af5d2fSBarry Smith     idt  = 7*i;
1051*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1052*f1af5d2fSBarry Smith     s6 = t[5+idt];s7 = t[6+idt];
1053*f1af5d2fSBarry Smith     while (nz--) {
1054*f1af5d2fSBarry Smith       idx   = 7*(*vi--);
1055*f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
1056*f1af5d2fSBarry Smith       t[idx+1] -=  v[7]*s1 +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1057*f1af5d2fSBarry Smith       t[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1058*f1af5d2fSBarry Smith       t[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1059*f1af5d2fSBarry Smith       t[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1060*f1af5d2fSBarry Smith       t[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1061*f1af5d2fSBarry Smith       t[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1062*f1af5d2fSBarry Smith       v -= 49;
1063*f1af5d2fSBarry Smith     }
1064*f1af5d2fSBarry Smith   }
1065*f1af5d2fSBarry Smith 
1066*f1af5d2fSBarry Smith   /* copy t into x according to permutation */
1067*f1af5d2fSBarry Smith   ii = 0;
1068*f1af5d2fSBarry Smith   for ( i=0; i<n; i++ ) {
1069*f1af5d2fSBarry Smith     ir      = 7*r[i];
1070*f1af5d2fSBarry Smith     x[ir]   = t[ii];
1071*f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
1072*f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
1073*f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
1074*f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
1075*f1af5d2fSBarry Smith     x[ir+5] = t[ii+5];
1076*f1af5d2fSBarry Smith     x[ir+6] = t[ii+6];
1077*f1af5d2fSBarry Smith     ii += 7;
1078*f1af5d2fSBarry Smith   }
1079*f1af5d2fSBarry Smith 
1080*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1081*f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1082*f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1083*f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1084*f1af5d2fSBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
1085*f1af5d2fSBarry Smith   PetscFunctionReturn(0);
1086*f1af5d2fSBarry Smith }
1087*f1af5d2fSBarry Smith 
10884e2b4712SSatish Balay /* ----------------------------------------------------------- */
10894e2b4712SSatish Balay #undef __FUNC__
10904e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N"
10914e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
10924e2b4712SSatish Balay {
10934e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
10944e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
10954e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
10964e2b4712SSatish Balay   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
10973f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1098*f1af5d2fSBarry Smith   Scalar          *x,*b,*s,*t,*ls;
10994e2b4712SSatish Balay 
11004e2b4712SSatish Balay   PetscFunctionBegin;
1101e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1102e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1103*f1af5d2fSBarry Smith   t  = a->solve_work;
11044e2b4712SSatish Balay 
11054e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
11064e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
11074e2b4712SSatish Balay 
11084e2b4712SSatish Balay   /* forward solve the lower triangular */
1109*f1af5d2fSBarry Smith   ierr = PetscMemcpy(t,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
11104e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
11114e2b4712SSatish Balay     v   = aa + bs2*ai[i];
11124e2b4712SSatish Balay     vi  = aj + ai[i];
11134e2b4712SSatish Balay     nz  = a->diag[i] - ai[i];
1114*f1af5d2fSBarry Smith     s = t + bs*i;
1115*f1af5d2fSBarry Smith     ierr = PetscMemcpy(s,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
11164e2b4712SSatish Balay     while (nz--) {
1117*f1af5d2fSBarry Smith       Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*(*vi++));
11184e2b4712SSatish Balay       v += bs2;
11194e2b4712SSatish Balay     }
11204e2b4712SSatish Balay   }
11214e2b4712SSatish Balay   /* backward solve the upper triangular */
1122*f1af5d2fSBarry Smith   ls = a->solve_work + a->n;
11234e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
11244e2b4712SSatish Balay     v   = aa + bs2*(a->diag[i] + 1);
11254e2b4712SSatish Balay     vi  = aj + a->diag[i] + 1;
11264e2b4712SSatish Balay     nz  = ai[i+1] - a->diag[i] - 1;
1127*f1af5d2fSBarry Smith     ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
11284e2b4712SSatish Balay     while (nz--) {
1129*f1af5d2fSBarry Smith       Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*(*vi++));
11304e2b4712SSatish Balay       v += bs2;
11314e2b4712SSatish Balay     }
1132*f1af5d2fSBarry Smith     Kernel_w_gets_A_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs);
1133*f1af5d2fSBarry Smith     ierr = PetscMemcpy(x + bs*(*c--),t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
11344e2b4712SSatish Balay   }
11354e2b4712SSatish Balay 
11364e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
11374e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1138e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1139e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
114042015d63SBarry Smith   PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n);
11414e2b4712SSatish Balay   PetscFunctionReturn(0);
11424e2b4712SSatish Balay }
11434e2b4712SSatish Balay 
11444e2b4712SSatish Balay #undef __FUNC__
11454e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7"
11464e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
11474e2b4712SSatish Balay {
11484e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
11494e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
11504e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
11514e2b4712SSatish Balay   int             *diag = a->diag;
11523f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1153*f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
1154*f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
11554e2b4712SSatish Balay 
11564e2b4712SSatish Balay   PetscFunctionBegin;
1157e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1158e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1159*f1af5d2fSBarry Smith   t  = a->solve_work;
11604e2b4712SSatish Balay 
11614e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
11624e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
11634e2b4712SSatish Balay 
11644e2b4712SSatish Balay   /* forward solve the lower triangular */
11654e2b4712SSatish Balay   idx    = 7*(*r++);
1166*f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1167*f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
1168*f1af5d2fSBarry Smith   t[5] = b[5+idx]; t[6] = b[6+idx];
11694e2b4712SSatish Balay 
11704e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
11714e2b4712SSatish Balay     v     = aa + 49*ai[i];
11724e2b4712SSatish Balay     vi    = aj + ai[i];
11734e2b4712SSatish Balay     nz    = diag[i] - ai[i];
11744e2b4712SSatish Balay     idx   = 7*(*r++);
1175*f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1176*f1af5d2fSBarry Smith     s5  = b[4+idx];s6 = b[5+idx];s7 = b[6+idx];
11774e2b4712SSatish Balay     while (nz--) {
11784e2b4712SSatish Balay       idx   = 7*(*vi++);
1179*f1af5d2fSBarry Smith       x1    = t[idx];  x2 = t[1+idx];x3 = t[2+idx];
1180*f1af5d2fSBarry Smith       x4    = t[3+idx];x5 = t[4+idx];
1181*f1af5d2fSBarry Smith       x6    = t[5+idx];x7 = t[6+idx];
1182*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1183*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1184*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1185*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1186*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1187*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1188*f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
11894e2b4712SSatish Balay       v += 49;
11904e2b4712SSatish Balay     }
11914e2b4712SSatish Balay     idx = 7*i;
1192*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1193*f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1194*f1af5d2fSBarry Smith     t[5+idx] = s6;t[6+idx] = s7;
11954e2b4712SSatish Balay   }
11964e2b4712SSatish Balay   /* backward solve the upper triangular */
11974e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
11984e2b4712SSatish Balay     v    = aa + 49*diag[i] + 49;
11994e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
12004e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
12014e2b4712SSatish Balay     idt  = 7*i;
1202*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1203*f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1204*f1af5d2fSBarry Smith     s6 = t[5+idt];s7 = t[6+idt];
12054e2b4712SSatish Balay     while (nz--) {
12064e2b4712SSatish Balay       idx   = 7*(*vi++);
1207*f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1208*f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1209*f1af5d2fSBarry Smith       x6    = t[5+idx]; x7 = t[6+idx];
1210*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1211*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1212*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1213*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1214*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1215*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1216*f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
12174e2b4712SSatish Balay       v += 49;
12184e2b4712SSatish Balay     }
12194e2b4712SSatish Balay     idc = 7*(*c--);
12204e2b4712SSatish Balay     v   = aa + 49*diag[i];
1221*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[7]*s2+v[14]*s3+
1222*f1af5d2fSBarry Smith                                  v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7;
1223*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+
1224*f1af5d2fSBarry Smith                                  v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7;
1225*f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+
1226*f1af5d2fSBarry Smith                                  v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7;
1227*f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+
1228*f1af5d2fSBarry Smith                                  v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7;
1229*f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+
1230*f1af5d2fSBarry Smith                                  v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7;
1231*f1af5d2fSBarry Smith     x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+
1232*f1af5d2fSBarry Smith                                  v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7;
1233*f1af5d2fSBarry Smith     x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+
1234*f1af5d2fSBarry Smith                                  v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7;
12354e2b4712SSatish Balay   }
12364e2b4712SSatish Balay 
12374e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
12384e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1239e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1240e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
124142015d63SBarry Smith   PLogFlops(2*49*(a->nz) - 7*a->n);
12424e2b4712SSatish Balay   PetscFunctionReturn(0);
12434e2b4712SSatish Balay }
12444e2b4712SSatish Balay 
12454e2b4712SSatish Balay #undef __FUNC__
124615091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering"
124715091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
124815091d37SBarry Smith {
124915091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
125015091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
125115091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
125215091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1253*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
125415091d37SBarry Smith 
125515091d37SBarry Smith   PetscFunctionBegin;
125615091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
125715091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
125815091d37SBarry Smith   /* forward solve the lower triangular */
125915091d37SBarry Smith   idx    = 0;
126015091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
126115091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
126215091d37SBarry Smith   x[6] = b[6+idx];
126315091d37SBarry Smith   for ( i=1; i<n; i++ ) {
126415091d37SBarry Smith     v     =  aa + 49*ai[i];
126515091d37SBarry Smith     vi    =  aj + ai[i];
126615091d37SBarry Smith     nz    =  diag[i] - ai[i];
126715091d37SBarry Smith     idx   =  7*i;
1268*f1af5d2fSBarry Smith     s1  =  b[idx];   s2 = b[1+idx]; s3 = b[2+idx];
1269*f1af5d2fSBarry Smith     s4  =  b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
1270*f1af5d2fSBarry Smith     s7  =  b[6+idx];
127115091d37SBarry Smith     while (nz--) {
127215091d37SBarry Smith       jdx   = 7*(*vi++);
127315091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
127415091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
127515091d37SBarry Smith       x7    = x[6+jdx];
1276*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1277*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1278*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1279*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1280*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1281*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1282*f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
128315091d37SBarry Smith       v += 49;
128415091d37SBarry Smith      }
1285*f1af5d2fSBarry Smith     x[idx]   = s1;
1286*f1af5d2fSBarry Smith     x[1+idx] = s2;
1287*f1af5d2fSBarry Smith     x[2+idx] = s3;
1288*f1af5d2fSBarry Smith     x[3+idx] = s4;
1289*f1af5d2fSBarry Smith     x[4+idx] = s5;
1290*f1af5d2fSBarry Smith     x[5+idx] = s6;
1291*f1af5d2fSBarry Smith     x[6+idx] = s7;
129215091d37SBarry Smith   }
129315091d37SBarry Smith   /* backward solve the upper triangular */
129415091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
129515091d37SBarry Smith     v    = aa + 49*diag[i] + 49;
129615091d37SBarry Smith     vi   = aj + diag[i] + 1;
129715091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
129815091d37SBarry Smith     idt  = 7*i;
1299*f1af5d2fSBarry Smith     s1 = x[idt];   s2 = x[1+idt];
1300*f1af5d2fSBarry Smith     s3 = x[2+idt]; s4 = x[3+idt];
1301*f1af5d2fSBarry Smith     s5 = x[4+idt]; s6 = x[5+idt];
1302*f1af5d2fSBarry Smith     s7 = x[6+idt];
130315091d37SBarry Smith     while (nz--) {
130415091d37SBarry Smith       idx   = 7*(*vi++);
130515091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
130615091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
130715091d37SBarry Smith       x7    = x[6+idx];
1308*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1309*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1310*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1311*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1312*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1313*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1314*f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
131515091d37SBarry Smith       v += 49;
131615091d37SBarry Smith     }
131715091d37SBarry Smith     v        = aa + 49*diag[i];
1318*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[7]*s2  + v[14]*s3 + v[21]*s4
1319*f1af5d2fSBarry Smith                          + v[28]*s5 + v[35]*s6 + v[42]*s7;
1320*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[8]*s2  + v[15]*s3 + v[22]*s4
1321*f1af5d2fSBarry Smith                          + v[29]*s5 + v[36]*s6 + v[43]*s7;
1322*f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[9]*s2  + v[16]*s3 + v[23]*s4
1323*f1af5d2fSBarry Smith                          + v[30]*s5 + v[37]*s6 + v[44]*s7;
1324*f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[10]*s2  + v[17]*s3 + v[24]*s4
1325*f1af5d2fSBarry Smith                          + v[31]*s5 + v[38]*s6 + v[45]*s7;
1326*f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[11]*s2  + v[18]*s3 + v[25]*s4
1327*f1af5d2fSBarry Smith                          + v[32]*s5 + v[39]*s6 + v[46]*s7;
1328*f1af5d2fSBarry Smith     x[5+idt] = v[5]*s1 + v[12]*s2  + v[19]*s3 + v[26]*s4
1329*f1af5d2fSBarry Smith                          + v[33]*s5 + v[40]*s6 + v[47]*s7;
1330*f1af5d2fSBarry Smith     x[6+idt] = v[6]*s1 + v[13]*s2  + v[20]*s3 + v[27]*s4
1331*f1af5d2fSBarry Smith                          + v[34]*s5 + v[41]*s6 + v[48]*s7;
133215091d37SBarry Smith   }
133315091d37SBarry Smith 
133415091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
133515091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
133615091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
133715091d37SBarry Smith   PetscFunctionReturn(0);
133815091d37SBarry Smith }
133915091d37SBarry Smith 
134015091d37SBarry Smith #undef __FUNC__
134115091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6"
134215091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
134315091d37SBarry Smith {
134415091d37SBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
134515091d37SBarry Smith   IS              iscol=a->col,isrow=a->row;
134615091d37SBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
134715091d37SBarry Smith   int             *diag = a->diag;
134815091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1349*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t;
135015091d37SBarry Smith 
135115091d37SBarry Smith   PetscFunctionBegin;
135215091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
135315091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1354*f1af5d2fSBarry Smith   t  = a->solve_work;
135515091d37SBarry Smith 
135615091d37SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
135715091d37SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
135815091d37SBarry Smith 
135915091d37SBarry Smith   /* forward solve the lower triangular */
136015091d37SBarry Smith   idx    = 6*(*r++);
1361*f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1362*f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx];
1363*f1af5d2fSBarry Smith   t[4] = b[4+idx]; t[5] = b[5+idx];
136415091d37SBarry Smith   for ( i=1; i<n; i++ ) {
136515091d37SBarry Smith     v     = aa + 36*ai[i];
136615091d37SBarry Smith     vi    = aj + ai[i];
136715091d37SBarry Smith     nz    = diag[i] - ai[i];
136815091d37SBarry Smith     idx   = 6*(*r++);
1369*f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1370*f1af5d2fSBarry Smith     s5  = b[4+idx]; s6 = b[5+idx];
137115091d37SBarry Smith     while (nz--) {
137215091d37SBarry Smith       idx   = 6*(*vi++);
1373*f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx]; x3 = t[2+idx];
1374*f1af5d2fSBarry Smith       x4    = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx];
1375*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1376*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1377*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1378*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1379*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1380*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
138115091d37SBarry Smith       v += 36;
138215091d37SBarry Smith     }
138315091d37SBarry Smith     idx = 6*i;
1384*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1385*f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4;
1386*f1af5d2fSBarry Smith     t[4+idx] = s5;t[5+idx] = s6;
138715091d37SBarry Smith   }
138815091d37SBarry Smith   /* backward solve the upper triangular */
138915091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
139015091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
139115091d37SBarry Smith     vi   = aj + diag[i] + 1;
139215091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
139315091d37SBarry Smith     idt  = 6*i;
1394*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1395*f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt];
1396*f1af5d2fSBarry Smith     s5 = t[4+idt];s6 = t[5+idt];
139715091d37SBarry Smith     while (nz--) {
139815091d37SBarry Smith       idx   = 6*(*vi++);
1399*f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1400*f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx];
1401*f1af5d2fSBarry Smith       x5    = t[4+idx]; x6 = t[5+idx];
1402*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1403*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1404*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1405*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1406*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1407*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
140815091d37SBarry Smith       v += 36;
140915091d37SBarry Smith     }
141015091d37SBarry Smith     idc = 6*(*c--);
141115091d37SBarry Smith     v   = aa + 36*diag[i];
1412*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[6]*s2+v[12]*s3+
1413*f1af5d2fSBarry Smith                                  v[18]*s4+v[24]*s5+v[30]*s6;
1414*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+
1415*f1af5d2fSBarry Smith                                  v[19]*s4+v[25]*s5+v[31]*s6;
1416*f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+
1417*f1af5d2fSBarry Smith                                  v[20]*s4+v[26]*s5+v[32]*s6;
1418*f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+
1419*f1af5d2fSBarry Smith                                  v[21]*s4+v[27]*s5+v[33]*s6;
1420*f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+
1421*f1af5d2fSBarry Smith                                  v[22]*s4+v[28]*s5+v[34]*s6;
1422*f1af5d2fSBarry Smith     x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+
1423*f1af5d2fSBarry Smith                                  v[23]*s4+v[29]*s5+v[35]*s6;
142415091d37SBarry Smith   }
142515091d37SBarry Smith 
142615091d37SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
142715091d37SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
142815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
142915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
143015091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
143115091d37SBarry Smith   PetscFunctionReturn(0);
143215091d37SBarry Smith }
143315091d37SBarry Smith 
143415091d37SBarry Smith #undef __FUNC__
143515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering"
143615091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
143715091d37SBarry Smith {
143815091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
143915091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
144015091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
144115091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1442*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
144315091d37SBarry Smith 
144415091d37SBarry Smith   PetscFunctionBegin;
144515091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
144615091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
144715091d37SBarry Smith   /* forward solve the lower triangular */
144815091d37SBarry Smith   idx    = 0;
144915091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
145015091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
145115091d37SBarry Smith   for ( i=1; i<n; i++ ) {
145215091d37SBarry Smith     v     =  aa + 36*ai[i];
145315091d37SBarry Smith     vi    =  aj + ai[i];
145415091d37SBarry Smith     nz    =  diag[i] - ai[i];
145515091d37SBarry Smith     idx   =  6*i;
1456*f1af5d2fSBarry Smith     s1  =  b[idx];   s2 = b[1+idx]; s3 = b[2+idx];
1457*f1af5d2fSBarry Smith     s4  =  b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
145815091d37SBarry Smith     while (nz--) {
145915091d37SBarry Smith       jdx   = 6*(*vi++);
146015091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
146115091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
1462*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1463*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1464*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1465*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1466*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1467*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
146815091d37SBarry Smith       v += 36;
146915091d37SBarry Smith      }
1470*f1af5d2fSBarry Smith     x[idx]   = s1;
1471*f1af5d2fSBarry Smith     x[1+idx] = s2;
1472*f1af5d2fSBarry Smith     x[2+idx] = s3;
1473*f1af5d2fSBarry Smith     x[3+idx] = s4;
1474*f1af5d2fSBarry Smith     x[4+idx] = s5;
1475*f1af5d2fSBarry Smith     x[5+idx] = s6;
147615091d37SBarry Smith   }
147715091d37SBarry Smith   /* backward solve the upper triangular */
147815091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
147915091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
148015091d37SBarry Smith     vi   = aj + diag[i] + 1;
148115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
148215091d37SBarry Smith     idt  = 6*i;
1483*f1af5d2fSBarry Smith     s1 = x[idt];   s2 = x[1+idt];
1484*f1af5d2fSBarry Smith     s3 = x[2+idt]; s4 = x[3+idt];
1485*f1af5d2fSBarry Smith     s5 = x[4+idt]; s6 = x[5+idt];
148615091d37SBarry Smith     while (nz--) {
148715091d37SBarry Smith       idx   = 6*(*vi++);
148815091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
148915091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
1490*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1491*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1492*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1493*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1494*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1495*f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
149615091d37SBarry Smith       v += 36;
149715091d37SBarry Smith     }
149815091d37SBarry Smith     v        = aa + 36*diag[i];
1499*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[6]*s2  + v[12]*s3 + v[18]*s4 + v[24]*s5 + v[30]*s6;
1500*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[7]*s2  + v[13]*s3 + v[19]*s4 + v[25]*s5 + v[31]*s6;
1501*f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[8]*s2  + v[14]*s3 + v[20]*s4 + v[26]*s5 + v[32]*s6;
1502*f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[9]*s2  + v[15]*s3 + v[21]*s4 + v[27]*s5 + v[33]*s6;
1503*f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[10]*s2 + v[16]*s3 + v[22]*s4 + v[28]*s5 + v[34]*s6;
1504*f1af5d2fSBarry Smith     x[5+idt] = v[5]*s1 + v[11]*s2 + v[17]*s3 + v[23]*s4 + v[29]*s5 + v[35]*s6;
150515091d37SBarry Smith   }
150615091d37SBarry Smith 
150715091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
150815091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
150915091d37SBarry Smith   PLogFlops(2*36*(a->nz) - 6*a->n);
151015091d37SBarry Smith   PetscFunctionReturn(0);
151115091d37SBarry Smith }
151215091d37SBarry Smith 
151315091d37SBarry Smith #undef __FUNC__
15144e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5"
15154e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
15164e2b4712SSatish Balay {
15174e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
15184e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
15194e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
15204e2b4712SSatish Balay   int             *diag = a->diag;
15213f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1522*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t;
15234e2b4712SSatish Balay 
15244e2b4712SSatish Balay   PetscFunctionBegin;
1525e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1526e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1527*f1af5d2fSBarry Smith   t  = a->solve_work;
15284e2b4712SSatish Balay 
15294e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
15304e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
15314e2b4712SSatish Balay 
15324e2b4712SSatish Balay   /* forward solve the lower triangular */
15334e2b4712SSatish Balay   idx    = 5*(*r++);
1534*f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1535*f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
15364e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
15374e2b4712SSatish Balay     v     = aa + 25*ai[i];
15384e2b4712SSatish Balay     vi    = aj + ai[i];
15394e2b4712SSatish Balay     nz    = diag[i] - ai[i];
15404e2b4712SSatish Balay     idx   = 5*(*r++);
1541*f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1542*f1af5d2fSBarry Smith     s5  = b[4+idx];
15434e2b4712SSatish Balay     while (nz--) {
15444e2b4712SSatish Balay       idx   = 5*(*vi++);
1545*f1af5d2fSBarry Smith       x1    = t[idx];  x2 = t[1+idx];x3 = t[2+idx];
1546*f1af5d2fSBarry Smith       x4    = t[3+idx];x5 = t[4+idx];
1547*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1548*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1549*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1550*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1551*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
15524e2b4712SSatish Balay       v += 25;
15534e2b4712SSatish Balay     }
15544e2b4712SSatish Balay     idx = 5*i;
1555*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1556*f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
15574e2b4712SSatish Balay   }
15584e2b4712SSatish Balay   /* backward solve the upper triangular */
15594e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
15604e2b4712SSatish Balay     v    = aa + 25*diag[i] + 25;
15614e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
15624e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
15634e2b4712SSatish Balay     idt  = 5*i;
1564*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1565*f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
15664e2b4712SSatish Balay     while (nz--) {
15674e2b4712SSatish Balay       idx   = 5*(*vi++);
1568*f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1569*f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1570*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1571*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1572*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1573*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1574*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
15754e2b4712SSatish Balay       v += 25;
15764e2b4712SSatish Balay     }
15774e2b4712SSatish Balay     idc = 5*(*c--);
15784e2b4712SSatish Balay     v   = aa + 25*diag[i];
1579*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[5]*s2+v[10]*s3+
1580*f1af5d2fSBarry Smith                                  v[15]*s4+v[20]*s5;
1581*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+
1582*f1af5d2fSBarry Smith                                  v[16]*s4+v[21]*s5;
1583*f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+
1584*f1af5d2fSBarry Smith                                  v[17]*s4+v[22]*s5;
1585*f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+
1586*f1af5d2fSBarry Smith                                  v[18]*s4+v[23]*s5;
1587*f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+
1588*f1af5d2fSBarry Smith                                  v[19]*s4+v[24]*s5;
15894e2b4712SSatish Balay   }
15904e2b4712SSatish Balay 
15914e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
15924e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1593e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1594e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
159542015d63SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
15964e2b4712SSatish Balay   PetscFunctionReturn(0);
15974e2b4712SSatish Balay }
15984e2b4712SSatish Balay 
15994e2b4712SSatish Balay #undef __FUNC__
160015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
160115091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
160215091d37SBarry Smith {
160315091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
160415091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
160515091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
160615091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1607*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;;
160815091d37SBarry Smith 
160915091d37SBarry Smith   PetscFunctionBegin;
161015091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
161115091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161215091d37SBarry Smith   /* forward solve the lower triangular */
161315091d37SBarry Smith   idx    = 0;
161415091d37SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
161515091d37SBarry Smith   for ( i=1; i<n; i++ ) {
161615091d37SBarry Smith     v     =  aa + 25*ai[i];
161715091d37SBarry Smith     vi    =  aj + ai[i];
161815091d37SBarry Smith     nz    =  diag[i] - ai[i];
161915091d37SBarry Smith     idx   =  5*i;
1620*f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx];
162115091d37SBarry Smith     while (nz--) {
162215091d37SBarry Smith       jdx   = 5*(*vi++);
162315091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
1624*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1625*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1626*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1627*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1628*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
162915091d37SBarry Smith       v    += 25;
163015091d37SBarry Smith     }
1631*f1af5d2fSBarry Smith     x[idx]   = s1;
1632*f1af5d2fSBarry Smith     x[1+idx] = s2;
1633*f1af5d2fSBarry Smith     x[2+idx] = s3;
1634*f1af5d2fSBarry Smith     x[3+idx] = s4;
1635*f1af5d2fSBarry Smith     x[4+idx] = s5;
163615091d37SBarry Smith   }
163715091d37SBarry Smith   /* backward solve the upper triangular */
163815091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
163915091d37SBarry Smith     v    = aa + 25*diag[i] + 25;
164015091d37SBarry Smith     vi   = aj + diag[i] + 1;
164115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
164215091d37SBarry Smith     idt  = 5*i;
1643*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1644*f1af5d2fSBarry Smith     s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
164515091d37SBarry Smith     while (nz--) {
164615091d37SBarry Smith       idx   = 5*(*vi++);
164715091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
1648*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1649*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1650*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1651*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1652*f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
165315091d37SBarry Smith       v    += 25;
165415091d37SBarry Smith     }
165515091d37SBarry Smith     v        = aa + 25*diag[i];
1656*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[5]*s2 + v[10]*s3  + v[15]*s4 + v[20]*s5;
1657*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3  + v[16]*s4 + v[21]*s5;
1658*f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3  + v[17]*s4 + v[22]*s5;
1659*f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3  + v[18]*s4 + v[23]*s5;
1660*f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3  + v[19]*s4 + v[24]*s5;
166115091d37SBarry Smith   }
166215091d37SBarry Smith 
166315091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
166415091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
166515091d37SBarry Smith   PLogFlops(2*25*(a->nz) - 5*a->n);
166615091d37SBarry Smith   PetscFunctionReturn(0);
166715091d37SBarry Smith }
166815091d37SBarry Smith 
166915091d37SBarry Smith #undef __FUNC__
16704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4"
16714e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
16724e2b4712SSatish Balay {
16734e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
16744e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
16754e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
16764e2b4712SSatish Balay   int             *diag = a->diag;
16773f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1678*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,x1,x2,x3,x4,*t;
16794e2b4712SSatish Balay 
16804e2b4712SSatish Balay   PetscFunctionBegin;
1681e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1682e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1683*f1af5d2fSBarry Smith   t  = a->solve_work;
16844e2b4712SSatish Balay 
16854e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
16864e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
16874e2b4712SSatish Balay 
16884e2b4712SSatish Balay   /* forward solve the lower triangular */
16894e2b4712SSatish Balay   idx    = 4*(*r++);
1690*f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1691*f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx];
16924e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
16934e2b4712SSatish Balay     v     = aa + 16*ai[i];
16944e2b4712SSatish Balay     vi    = aj + ai[i];
16954e2b4712SSatish Balay     nz    = diag[i] - ai[i];
16964e2b4712SSatish Balay     idx   = 4*(*r++);
1697*f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
16984e2b4712SSatish Balay     while (nz--) {
16994e2b4712SSatish Balay       idx   = 4*(*vi++);
1700*f1af5d2fSBarry Smith       x1    = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx];
1701*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
1702*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
1703*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1704*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
17054e2b4712SSatish Balay       v    += 16;
17064e2b4712SSatish Balay     }
17074e2b4712SSatish Balay     idx        = 4*i;
1708*f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1709*f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4;
17104e2b4712SSatish Balay   }
17114e2b4712SSatish Balay   /* backward solve the upper triangular */
17124e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
17134e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
17144e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
17154e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
17164e2b4712SSatish Balay     idt  = 4*i;
1717*f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1718*f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt];
17194e2b4712SSatish Balay     while (nz--) {
17204e2b4712SSatish Balay       idx   = 4*(*vi++);
1721*f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1722*f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx];
1723*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1724*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1725*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1726*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
17274e2b4712SSatish Balay       v += 16;
17284e2b4712SSatish Balay     }
17294e2b4712SSatish Balay     idc      = 4*(*c--);
17304e2b4712SSatish Balay     v        = aa + 16*diag[i];
1731*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4;
1732*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4;
1733*f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4;
1734*f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4;
17354e2b4712SSatish Balay   }
17364e2b4712SSatish Balay 
17374e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
17384e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1739e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1740e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
174142015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
17424e2b4712SSatish Balay   PetscFunctionReturn(0);
17434e2b4712SSatish Balay }
17440ef38995SBarry Smith 
17450ef38995SBarry Smith 
17464e2b4712SSatish Balay /*
17474e2b4712SSatish Balay       Special case where the matrix was ILU(0) factored in the natural
17484e2b4712SSatish Balay    ordering. This eliminates the need for the column and row permutation.
17494e2b4712SSatish Balay */
17504e2b4712SSatish Balay #undef __FUNC__
17514e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
17524e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
17534e2b4712SSatish Balay {
17544e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
175530d4dcafSBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
175630d4dcafSBarry Smith   int             ierr,*diag = a->diag;
17573f1db9ecSBarry Smith   MatScalar       *aa=a->a;
175830d4dcafSBarry Smith   Scalar          *x,*b;
17594e2b4712SSatish Balay 
17604e2b4712SSatish Balay   PetscFunctionBegin;
1761e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1762e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
17634e2b4712SSatish Balay 
1764aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
17652853dc0eSBarry Smith   {
17662853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
17672853dc0eSBarry Smith     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
17682853dc0eSBarry Smith   }
1769aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
17702853dc0eSBarry Smith   {
17712853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
17722853dc0eSBarry Smith     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
17732853dc0eSBarry Smith   }
1774aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
17752853dc0eSBarry Smith   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
1776e1293385SBarry Smith #else
177730d4dcafSBarry Smith   {
1778*f1af5d2fSBarry Smith     Scalar    s1,s2,s3,s4,x1,x2,x3,x4;
17793f1db9ecSBarry Smith     MatScalar *v;
178030d4dcafSBarry Smith     int       jdx,idt,idx,nz,*vi,i;
1781e1293385SBarry Smith 
17824e2b4712SSatish Balay   /* forward solve the lower triangular */
17834e2b4712SSatish Balay   idx    = 0;
1784e1293385SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
17854e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
17864e2b4712SSatish Balay     v     =  aa      + 16*ai[i];
17874e2b4712SSatish Balay     vi    =  aj      + ai[i];
17884e2b4712SSatish Balay     nz    =  diag[i] - ai[i];
1789e1293385SBarry Smith     idx   +=  4;
1790*f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
17914e2b4712SSatish Balay     while (nz--) {
17924e2b4712SSatish Balay       jdx   = 4*(*vi++);
17934e2b4712SSatish Balay       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
1794*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
1795*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
1796*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1797*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
17984e2b4712SSatish Balay       v    += 16;
17994e2b4712SSatish Balay     }
1800*f1af5d2fSBarry Smith     x[idx]   = s1;
1801*f1af5d2fSBarry Smith     x[1+idx] = s2;
1802*f1af5d2fSBarry Smith     x[2+idx] = s3;
1803*f1af5d2fSBarry Smith     x[3+idx] = s4;
18044e2b4712SSatish Balay   }
18054e2b4712SSatish Balay   /* backward solve the upper triangular */
18064e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
18074e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
18084e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
18094e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
18104e2b4712SSatish Balay     idt  = 4*i;
1811*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1812*f1af5d2fSBarry Smith     s3 = x[2+idt];s4 = x[3+idt];
18134e2b4712SSatish Balay     while (nz--) {
18144e2b4712SSatish Balay       idx   = 4*(*vi++);
18154e2b4712SSatish Balay       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
1816*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1817*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1818*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1819*f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
18204e2b4712SSatish Balay       v    += 16;
18214e2b4712SSatish Balay     }
18224e2b4712SSatish Balay     v        = aa + 16*diag[i];
1823*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[4]*s2 + v[8]*s3  + v[12]*s4;
1824*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[5]*s2 + v[9]*s3  + v[13]*s4;
1825*f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[6]*s2 + v[10]*s3 + v[14]*s4;
1826*f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[7]*s2 + v[11]*s3 + v[15]*s4;
18274e2b4712SSatish Balay   }
182830d4dcafSBarry Smith   }
1829e1293385SBarry Smith #endif
18304e2b4712SSatish Balay 
1831e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1832e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
183342015d63SBarry Smith   PLogFlops(2*16*(a->nz) - 4*a->n);
18344e2b4712SSatish Balay   PetscFunctionReturn(0);
18354e2b4712SSatish Balay }
18364e2b4712SSatish Balay 
1837667159a5SBarry Smith #undef __FUNC__
18384e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3"
18394e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
18404e2b4712SSatish Balay {
18414e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
18424e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
18434e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
18444e2b4712SSatish Balay   int             *diag = a->diag;
18453f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1846*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,x1,x2,x3,*t;
18474e2b4712SSatish Balay 
18484e2b4712SSatish Balay   PetscFunctionBegin;
1849e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1850e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1851*f1af5d2fSBarry Smith   t  = a->solve_work;
18524e2b4712SSatish Balay 
18534e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
18544e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
18554e2b4712SSatish Balay 
18564e2b4712SSatish Balay   /* forward solve the lower triangular */
18574e2b4712SSatish Balay   idx    = 3*(*r++);
1858*f1af5d2fSBarry Smith   t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx];
18594e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
18604e2b4712SSatish Balay     v     = aa + 9*ai[i];
18614e2b4712SSatish Balay     vi    = aj + ai[i];
18624e2b4712SSatish Balay     nz    = diag[i] - ai[i];
18634e2b4712SSatish Balay     idx   = 3*(*r++);
1864*f1af5d2fSBarry Smith     s1  = b[idx]; s2 = b[1+idx]; s3 = b[2+idx];
18654e2b4712SSatish Balay     while (nz--) {
18664e2b4712SSatish Balay       idx   = 3*(*vi++);
1867*f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1868*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1869*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1870*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
18714e2b4712SSatish Balay       v += 9;
18724e2b4712SSatish Balay     }
18734e2b4712SSatish Balay     idx = 3*i;
1874*f1af5d2fSBarry Smith     t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3;
18754e2b4712SSatish Balay   }
18764e2b4712SSatish Balay   /* backward solve the upper triangular */
18774e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
18784e2b4712SSatish Balay     v    = aa + 9*diag[i] + 9;
18794e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
18804e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
18814e2b4712SSatish Balay     idt  = 3*i;
1882*f1af5d2fSBarry Smith     s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
18834e2b4712SSatish Balay     while (nz--) {
18844e2b4712SSatish Balay       idx   = 3*(*vi++);
1885*f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1886*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1887*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1888*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
18894e2b4712SSatish Balay       v += 9;
18904e2b4712SSatish Balay     }
18914e2b4712SSatish Balay     idc = 3*(*c--);
18924e2b4712SSatish Balay     v   = aa + 9*diag[i];
1893*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
1894*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1895*f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
18964e2b4712SSatish Balay   }
18974e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
18984e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1899e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1900e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
190142015d63SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
19024e2b4712SSatish Balay   PetscFunctionReturn(0);
19034e2b4712SSatish Balay }
19044e2b4712SSatish Balay 
190515091d37SBarry Smith /*
190615091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
190715091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
190815091d37SBarry Smith */
190915091d37SBarry Smith #undef __FUNC__
191015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering"
191115091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
191215091d37SBarry Smith {
191315091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
191415091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
191515091d37SBarry Smith   int             ierr,*diag = a->diag;
191615091d37SBarry Smith   MatScalar       *aa=a->a, *v;
1917*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,x1,x2,x3;
191815091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
191915091d37SBarry Smith 
192015091d37SBarry Smith   PetscFunctionBegin;
192115091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
192215091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
192315091d37SBarry Smith 
192415091d37SBarry Smith 
192515091d37SBarry Smith   /* forward solve the lower triangular */
192615091d37SBarry Smith   idx    = 0;
192715091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2];
192815091d37SBarry Smith   for ( i=1; i<n; i++ ) {
192915091d37SBarry Smith     v     =  aa      + 9*ai[i];
193015091d37SBarry Smith     vi    =  aj      + ai[i];
193115091d37SBarry Smith     nz    =  diag[i] - ai[i];
193215091d37SBarry Smith     idx   +=  3;
1933*f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];
193415091d37SBarry Smith     while (nz--) {
193515091d37SBarry Smith       jdx   = 3*(*vi++);
193615091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
1937*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1938*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1939*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
194015091d37SBarry Smith       v    += 9;
194115091d37SBarry Smith     }
1942*f1af5d2fSBarry Smith     x[idx]   = s1;
1943*f1af5d2fSBarry Smith     x[1+idx] = s2;
1944*f1af5d2fSBarry Smith     x[2+idx] = s3;
194515091d37SBarry Smith   }
194615091d37SBarry Smith   /* backward solve the upper triangular */
194715091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
194815091d37SBarry Smith     v    = aa + 9*diag[i] + 9;
194915091d37SBarry Smith     vi   = aj + diag[i] + 1;
195015091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
195115091d37SBarry Smith     idt  = 3*i;
1952*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1953*f1af5d2fSBarry Smith     s3 = x[2+idt];
195415091d37SBarry Smith     while (nz--) {
195515091d37SBarry Smith       idx   = 3*(*vi++);
195615091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
1957*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1958*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1959*f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
196015091d37SBarry Smith       v    += 9;
196115091d37SBarry Smith     }
196215091d37SBarry Smith     v        = aa +  9*diag[i];
1963*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
1964*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1965*f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
196615091d37SBarry Smith   }
196715091d37SBarry Smith 
196815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
196915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
197015091d37SBarry Smith   PLogFlops(2*9*(a->nz) - 3*a->n);
197115091d37SBarry Smith   PetscFunctionReturn(0);
197215091d37SBarry Smith }
197315091d37SBarry Smith 
19744e2b4712SSatish Balay #undef __FUNC__
19754e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2"
19764e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
19774e2b4712SSatish Balay {
19784e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
19794e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
19804e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
19814e2b4712SSatish Balay   int             *diag = a->diag;
19823f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1983*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,x1,x2,*t;
19844e2b4712SSatish Balay 
19854e2b4712SSatish Balay   PetscFunctionBegin;
1986e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1987e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1988*f1af5d2fSBarry Smith   t  = a->solve_work;
19894e2b4712SSatish Balay 
19904e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
19914e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
19924e2b4712SSatish Balay 
19934e2b4712SSatish Balay   /* forward solve the lower triangular */
19944e2b4712SSatish Balay   idx    = 2*(*r++);
1995*f1af5d2fSBarry Smith   t[0] = b[idx]; t[1] = b[1+idx];
19964e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
19974e2b4712SSatish Balay     v     = aa + 4*ai[i];
19984e2b4712SSatish Balay     vi    = aj + ai[i];
19994e2b4712SSatish Balay     nz    = diag[i] - ai[i];
20004e2b4712SSatish Balay     idx   = 2*(*r++);
2001*f1af5d2fSBarry Smith     s1  = b[idx]; s2 = b[1+idx];
20024e2b4712SSatish Balay     while (nz--) {
20034e2b4712SSatish Balay       idx   = 2*(*vi++);
2004*f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx];
2005*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2006*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
20074e2b4712SSatish Balay       v += 4;
20084e2b4712SSatish Balay     }
20094e2b4712SSatish Balay     idx = 2*i;
2010*f1af5d2fSBarry Smith     t[idx] = s1; t[1+idx] = s2;
20114e2b4712SSatish Balay   }
20124e2b4712SSatish Balay   /* backward solve the upper triangular */
20134e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
20144e2b4712SSatish Balay     v    = aa + 4*diag[i] + 4;
20154e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
20164e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
20174e2b4712SSatish Balay     idt  = 2*i;
2018*f1af5d2fSBarry Smith     s1 = t[idt]; s2 = t[1+idt];
20194e2b4712SSatish Balay     while (nz--) {
20204e2b4712SSatish Balay       idx   = 2*(*vi++);
2021*f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx];
2022*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2023*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
20244e2b4712SSatish Balay       v += 4;
20254e2b4712SSatish Balay     }
20264e2b4712SSatish Balay     idc = 2*(*c--);
20274e2b4712SSatish Balay     v   = aa + 4*diag[i];
2028*f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1 + v[2]*s2;
2029*f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2;
20304e2b4712SSatish Balay   }
20314e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
20324e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
2033e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2034e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
203542015d63SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
20364e2b4712SSatish Balay   PetscFunctionReturn(0);
20374e2b4712SSatish Balay }
20384e2b4712SSatish Balay 
203915091d37SBarry Smith /*
204015091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
204115091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
204215091d37SBarry Smith */
204315091d37SBarry Smith #undef __FUNC__
204415091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering"
204515091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
204615091d37SBarry Smith {
204715091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
204815091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
204915091d37SBarry Smith   int             ierr,*diag = a->diag;
205015091d37SBarry Smith   MatScalar       *aa=a->a,*v;
2051*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,x1,x2;
205215091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
205315091d37SBarry Smith 
205415091d37SBarry Smith   PetscFunctionBegin;
205515091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
205615091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
205715091d37SBarry Smith 
205815091d37SBarry Smith   /* forward solve the lower triangular */
205915091d37SBarry Smith   idx    = 0;
206015091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1];
206115091d37SBarry Smith   for ( i=1; i<n; i++ ) {
206215091d37SBarry Smith     v     =  aa      + 4*ai[i];
206315091d37SBarry Smith     vi    =  aj      + ai[i];
206415091d37SBarry Smith     nz    =  diag[i] - ai[i];
206515091d37SBarry Smith     idx   +=  2;
2066*f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];
206715091d37SBarry Smith     while (nz--) {
206815091d37SBarry Smith       jdx   = 2*(*vi++);
206915091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];
2070*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2071*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
207215091d37SBarry Smith       v    += 4;
207315091d37SBarry Smith     }
2074*f1af5d2fSBarry Smith     x[idx]   = s1;
2075*f1af5d2fSBarry Smith     x[1+idx] = s2;
207615091d37SBarry Smith   }
207715091d37SBarry Smith   /* backward solve the upper triangular */
207815091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
207915091d37SBarry Smith     v    = aa + 4*diag[i] + 4;
208015091d37SBarry Smith     vi   = aj + diag[i] + 1;
208115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
208215091d37SBarry Smith     idt  = 2*i;
2083*f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
208415091d37SBarry Smith     while (nz--) {
208515091d37SBarry Smith       idx   = 2*(*vi++);
208615091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];
2087*f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2088*f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
208915091d37SBarry Smith       v    += 4;
209015091d37SBarry Smith     }
209115091d37SBarry Smith     v        = aa +  4*diag[i];
2092*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[2]*s2;
2093*f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[3]*s2;
209415091d37SBarry Smith   }
209515091d37SBarry Smith 
209615091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
209715091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
209815091d37SBarry Smith   PLogFlops(2*4*(a->nz) - 2*a->n);
209915091d37SBarry Smith   PetscFunctionReturn(0);
210015091d37SBarry Smith }
210115091d37SBarry Smith 
21024e2b4712SSatish Balay #undef __FUNC__
21034e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1"
21044e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
21054e2b4712SSatish Balay {
21064e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
21074e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
21084e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
21094e2b4712SSatish Balay   int             *diag = a->diag;
21103f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
2111*f1af5d2fSBarry Smith   Scalar          *x,*b,s1,*t;
21124e2b4712SSatish Balay 
21134e2b4712SSatish Balay   PetscFunctionBegin;
21144e2b4712SSatish Balay   if (!n) PetscFunctionReturn(0);
21154e2b4712SSatish Balay 
2116e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2117e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2118*f1af5d2fSBarry Smith   t  = a->solve_work;
21194e2b4712SSatish Balay 
21204e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
21214e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
21224e2b4712SSatish Balay 
21234e2b4712SSatish Balay   /* forward solve the lower triangular */
2124*f1af5d2fSBarry Smith   t[0] = b[*r++];
21254e2b4712SSatish Balay   for ( i=1; i<n; i++ ) {
21264e2b4712SSatish Balay     v     = aa + ai[i];
21274e2b4712SSatish Balay     vi    = aj + ai[i];
21284e2b4712SSatish Balay     nz    = diag[i] - ai[i];
2129*f1af5d2fSBarry Smith     s1  = b[*r++];
21304e2b4712SSatish Balay     while (nz--) {
2131*f1af5d2fSBarry Smith       s1 -= (*v++)*t[*vi++];
21324e2b4712SSatish Balay     }
2133*f1af5d2fSBarry Smith     t[i] = s1;
21344e2b4712SSatish Balay   }
21354e2b4712SSatish Balay   /* backward solve the upper triangular */
21364e2b4712SSatish Balay   for ( i=n-1; i>=0; i-- ){
21374e2b4712SSatish Balay     v    = aa + diag[i] + 1;
21384e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
21394e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
2140*f1af5d2fSBarry Smith     s1 = t[i];
21414e2b4712SSatish Balay     while (nz--) {
2142*f1af5d2fSBarry Smith       s1 -= (*v++)*t[*vi++];
21434e2b4712SSatish Balay     }
2144*f1af5d2fSBarry Smith     x[*c--] = t[i] = aa[diag[i]]*s1;
21454e2b4712SSatish Balay   }
21464e2b4712SSatish Balay 
21474e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
21484e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
2149e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2150e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
21514e2b4712SSatish Balay   PLogFlops(2*1*(a->nz) - a->n);
21524e2b4712SSatish Balay   PetscFunctionReturn(0);
21534e2b4712SSatish Balay }
215415091d37SBarry Smith /*
215515091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
215615091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
215715091d37SBarry Smith */
215815091d37SBarry Smith #undef __FUNC__
215915091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering"
216015091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
216115091d37SBarry Smith {
216215091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
216315091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
216415091d37SBarry Smith   int             ierr,*diag = a->diag;
216515091d37SBarry Smith   MatScalar       *aa=a->a;
216615091d37SBarry Smith   Scalar          *x,*b;
2167*f1af5d2fSBarry Smith   Scalar          s1,x1;
216815091d37SBarry Smith   MatScalar       *v;
216915091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
217015091d37SBarry Smith 
217115091d37SBarry Smith   PetscFunctionBegin;
217215091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
217315091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
217415091d37SBarry Smith 
217515091d37SBarry Smith   /* forward solve the lower triangular */
217615091d37SBarry Smith   idx    = 0;
217715091d37SBarry Smith   x[0]   = b[0];
217815091d37SBarry Smith   for ( i=1; i<n; i++ ) {
217915091d37SBarry Smith     v     =  aa      + ai[i];
218015091d37SBarry Smith     vi    =  aj      + ai[i];
218115091d37SBarry Smith     nz    =  diag[i] - ai[i];
218215091d37SBarry Smith     idx   +=  1;
2183*f1af5d2fSBarry Smith     s1  =  b[idx];
218415091d37SBarry Smith     while (nz--) {
218515091d37SBarry Smith       jdx   = *vi++;
218615091d37SBarry Smith       x1    = x[jdx];
2187*f1af5d2fSBarry Smith       s1 -= v[0]*x1;
218815091d37SBarry Smith       v    += 1;
218915091d37SBarry Smith     }
2190*f1af5d2fSBarry Smith     x[idx]   = s1;
219115091d37SBarry Smith   }
219215091d37SBarry Smith   /* backward solve the upper triangular */
219315091d37SBarry Smith   for ( i=n-1; i>=0; i-- ){
219415091d37SBarry Smith     v    = aa + diag[i] + 1;
219515091d37SBarry Smith     vi   = aj + diag[i] + 1;
219615091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
219715091d37SBarry Smith     idt  = i;
2198*f1af5d2fSBarry Smith     s1 = x[idt];
219915091d37SBarry Smith     while (nz--) {
220015091d37SBarry Smith       idx   = *vi++;
220115091d37SBarry Smith       x1    = x[idx];
2202*f1af5d2fSBarry Smith       s1 -= v[0]*x1;
220315091d37SBarry Smith       v    += 1;
220415091d37SBarry Smith     }
220515091d37SBarry Smith     v        = aa +  diag[i];
2206*f1af5d2fSBarry Smith     x[idt]   = v[0]*s1;
220715091d37SBarry Smith   }
220815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
220915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
221015091d37SBarry Smith   PLogFlops(2*(a->nz) - a->n);
221115091d37SBarry Smith   PetscFunctionReturn(0);
221215091d37SBarry Smith }
22134e2b4712SSatish Balay 
22144e2b4712SSatish Balay /* ----------------------------------------------------------------*/
22154e2b4712SSatish Balay /*
22164e2b4712SSatish Balay      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
22174e2b4712SSatish Balay    except that the data structure of Mat_SeqAIJ is slightly different.
22184e2b4712SSatish Balay    Not a good example of code reuse.
22194e2b4712SSatish Balay */
2220435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat);
2221435faa5fSBarry Smith 
22224e2b4712SSatish Balay #undef __FUNC__
22234e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
2224435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
22254e2b4712SSatish Balay {
22264e2b4712SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
22274e2b4712SSatish Balay   IS          isicol;
22284e2b4712SSatish Balay   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
22294e2b4712SSatish Balay   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
2230335d9088SBarry Smith   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
2231435faa5fSBarry Smith   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill;
22324e2b4712SSatish Balay   PetscTruth  col_identity, row_identity;
2233435faa5fSBarry Smith   double      f;
22344e2b4712SSatish Balay 
22354e2b4712SSatish Balay   PetscFunctionBegin;
2236435faa5fSBarry Smith   if (info) {
2237435faa5fSBarry Smith     f             = info->fill;
2238335d9088SBarry Smith     levels        = (int) info->levels;
2239335d9088SBarry Smith     diagonal_fill = (int) info->diagonal_fill;
2240435faa5fSBarry Smith   } else {
2241435faa5fSBarry Smith     f             = 1.0;
2242435faa5fSBarry Smith     levels        = 0;
2243435faa5fSBarry Smith     diagonal_fill = 0;
2244435faa5fSBarry Smith   }
2245e51c0b9cSSatish Balay   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
2246e51c0b9cSSatish Balay 
22474e2b4712SSatish Balay   /* special case that simply copies fill pattern */
22484e2b4712SSatish Balay   PetscValidHeaderSpecific(isrow,IS_COOKIE);
22494e2b4712SSatish Balay   PetscValidHeaderSpecific(iscol,IS_COOKIE);
2250667159a5SBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2251667159a5SBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
22524e2b4712SSatish Balay   if (levels == 0 && row_identity && col_identity) {
2253e1293385SBarry Smith     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
22544e2b4712SSatish Balay     (*fact)->factor = FACTOR_LU;
22554e2b4712SSatish Balay     b               = (Mat_SeqBAIJ *) (*fact)->data;
22564e2b4712SSatish Balay     if (!b->diag) {
22574e2b4712SSatish Balay       ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
22584e2b4712SSatish Balay     }
2259435faa5fSBarry Smith     ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
22604e2b4712SSatish Balay     b->row        = isrow;
22614e2b4712SSatish Balay     b->col        = iscol;
2262e51c0b9cSSatish Balay     b->icol       = isicol;
22634e2b4712SSatish Balay     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
22644e2b4712SSatish Balay    /*
226515091d37SBarry Smith         Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
226615091d37SBarry Smith         for ILU(0) factorization with natural ordering
22674e2b4712SSatish Balay    */
226815091d37SBarry Smith     switch (b->bs) {
226915091d37SBarry Smith     case 2:
227015091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
227115091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
227215091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
227315091d37SBarry Smith       break;
227415091d37SBarry Smith     case 3:
227515091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
227615091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
227715091d37SBarry Smith       PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
227815091d37SBarry Smith       break;
227915091d37SBarry Smith     case 4:
2280f830108cSBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
2281f830108cSBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
228215091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
228315091d37SBarry Smith       break;
228415091d37SBarry Smith     case 5:
2285667159a5SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
2286667159a5SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
228715091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
228815091d37SBarry Smith       break;
228915091d37SBarry Smith     case 6:
229015091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
229115091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
229215091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
229315091d37SBarry Smith       break;
229415091d37SBarry Smith     case 7:
229515091d37SBarry Smith       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
229615091d37SBarry Smith       (*fact)->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
229715091d37SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
229815091d37SBarry Smith     break;
22994e2b4712SSatish Balay   }
23004e2b4712SSatish Balay     PetscFunctionReturn(0);
23014e2b4712SSatish Balay   }
23024e2b4712SSatish Balay 
23034e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
23044e2b4712SSatish Balay   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
23054e2b4712SSatish Balay 
23064e2b4712SSatish Balay   /* get new row pointers */
23074e2b4712SSatish Balay   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
23084e2b4712SSatish Balay   ainew[0] = 0;
23094e2b4712SSatish Balay   /* don't know how many column pointers are needed so estimate */
23104e2b4712SSatish Balay   jmax = (int) (f*ai[n] + 1);
23114e2b4712SSatish Balay   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
23124e2b4712SSatish Balay   /* ajfill is level of fill for each fill entry */
23134e2b4712SSatish Balay   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
23144e2b4712SSatish Balay   /* fill is a linked list of nonzeros in active row */
23154e2b4712SSatish Balay   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
23164e2b4712SSatish Balay   /* im is level for each filled value */
23174e2b4712SSatish Balay   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
23184e2b4712SSatish Balay   /* dloc is location of diagonal in factor */
23194e2b4712SSatish Balay   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
23204e2b4712SSatish Balay   dloc[0]  = 0;
23214e2b4712SSatish Balay   for ( prow=0; prow<n; prow++ ) {
2322435faa5fSBarry Smith 
2323435faa5fSBarry Smith     /* copy prow into linked list */
23244e2b4712SSatish Balay     nzf        = nz  = ai[r[prow]+1] - ai[r[prow]];
23254e2b4712SSatish Balay     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
23264e2b4712SSatish Balay     xi         = aj + ai[r[prow]];
23274e2b4712SSatish Balay     fill[n]    = n;
2328435faa5fSBarry Smith     fill[prow] = -1; /* marker for diagonal entry */
23294e2b4712SSatish Balay     while (nz--) {
23304e2b4712SSatish Balay       fm  = n;
23314e2b4712SSatish Balay       idx = ic[*xi++];
23324e2b4712SSatish Balay       do {
23334e2b4712SSatish Balay         m  = fm;
23344e2b4712SSatish Balay         fm = fill[m];
23354e2b4712SSatish Balay       } while (fm < idx);
23364e2b4712SSatish Balay       fill[m]   = idx;
23374e2b4712SSatish Balay       fill[idx] = fm;
23384e2b4712SSatish Balay       im[idx]   = 0;
23394e2b4712SSatish Balay     }
2340435faa5fSBarry Smith 
2341435faa5fSBarry Smith     /* make sure diagonal entry is included */
2342435faa5fSBarry Smith     if (diagonal_fill && fill[prow] == -1) {
2343435faa5fSBarry Smith       fm = n;
2344435faa5fSBarry Smith       while (fill[fm] < prow) fm = fill[fm];
2345435faa5fSBarry Smith       fill[prow] = fill[fm];  /* insert diagonal into linked list */
2346435faa5fSBarry Smith       fill[fm]   = prow;
2347435faa5fSBarry Smith       im[prow]   = 0;
2348435faa5fSBarry Smith       nzf++;
2349335d9088SBarry Smith       dcount++;
2350435faa5fSBarry Smith     }
2351435faa5fSBarry Smith 
23524e2b4712SSatish Balay     nzi = 0;
23534e2b4712SSatish Balay     row = fill[n];
23544e2b4712SSatish Balay     while ( row < prow ) {
23554e2b4712SSatish Balay       incrlev = im[row] + 1;
23564e2b4712SSatish Balay       nz      = dloc[row];
2357435faa5fSBarry Smith       xi      = ajnew  + ainew[row] + nz + 1;
23584e2b4712SSatish Balay       flev    = ajfill + ainew[row] + nz + 1;
23594e2b4712SSatish Balay       nnz     = ainew[row+1] - ainew[row] - nz - 1;
23604e2b4712SSatish Balay       fm      = row;
23614e2b4712SSatish Balay       while (nnz-- > 0) {
23624e2b4712SSatish Balay         idx = *xi++;
23634e2b4712SSatish Balay         if (*flev + incrlev > levels) {
23644e2b4712SSatish Balay           flev++;
23654e2b4712SSatish Balay           continue;
23664e2b4712SSatish Balay         }
23674e2b4712SSatish Balay         do {
23684e2b4712SSatish Balay           m  = fm;
23694e2b4712SSatish Balay           fm = fill[m];
23704e2b4712SSatish Balay         } while (fm < idx);
23714e2b4712SSatish Balay         if (fm != idx) {
23724e2b4712SSatish Balay           im[idx]   = *flev + incrlev;
23734e2b4712SSatish Balay           fill[m]   = idx;
23744e2b4712SSatish Balay           fill[idx] = fm;
23754e2b4712SSatish Balay           fm        = idx;
23764e2b4712SSatish Balay           nzf++;
2377ecf371e4SBarry Smith         } else {
23784e2b4712SSatish Balay           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
23794e2b4712SSatish Balay         }
23804e2b4712SSatish Balay         flev++;
23814e2b4712SSatish Balay       }
23824e2b4712SSatish Balay       row = fill[row];
23834e2b4712SSatish Balay       nzi++;
23844e2b4712SSatish Balay     }
23854e2b4712SSatish Balay     /* copy new filled row into permanent storage */
23864e2b4712SSatish Balay     ainew[prow+1] = ainew[prow] + nzf;
23874e2b4712SSatish Balay     if (ainew[prow+1] > jmax) {
2388ecf371e4SBarry Smith 
2389ecf371e4SBarry Smith       /* estimate how much additional space we will need */
2390ecf371e4SBarry Smith       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
2391ecf371e4SBarry Smith       /* just double the memory each time */
2392ecf371e4SBarry Smith       int maxadd = jmax;
2393ecf371e4SBarry Smith       /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */
23944e2b4712SSatish Balay       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
23954e2b4712SSatish Balay       jmax += maxadd;
2396ecf371e4SBarry Smith 
2397ecf371e4SBarry Smith       /* allocate a longer ajnew and ajfill */
23984e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
2399549d3d68SSatish Balay       ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr);
2400606d414cSSatish Balay       ierr = PetscFree(ajnew);CHKERRQ(ierr);
24014e2b4712SSatish Balay       ajnew = xi;
24024e2b4712SSatish Balay       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
2403549d3d68SSatish Balay       ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr);
2404606d414cSSatish Balay       ierr = PetscFree(ajfill);CHKERRQ(ierr);
24054e2b4712SSatish Balay       ajfill = xi;
2406ecf371e4SBarry Smith       realloc++; /* count how many reallocations are needed */
24074e2b4712SSatish Balay     }
24084e2b4712SSatish Balay     xi          = ajnew + ainew[prow];
24094e2b4712SSatish Balay     flev        = ajfill + ainew[prow];
24104e2b4712SSatish Balay     dloc[prow]  = nzi;
24114e2b4712SSatish Balay     fm          = fill[n];
24124e2b4712SSatish Balay     while (nzf--) {
24134e2b4712SSatish Balay       *xi++   = fm;
24144e2b4712SSatish Balay       *flev++ = im[fm];
24154e2b4712SSatish Balay       fm      = fill[fm];
24164e2b4712SSatish Balay     }
2417435faa5fSBarry Smith     /* make sure row has diagonal entry */
2418435faa5fSBarry Smith     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
2419435faa5fSBarry Smith       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
2420435faa5fSBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
2421435faa5fSBarry Smith     }
24224e2b4712SSatish Balay   }
2423606d414cSSatish Balay   ierr = PetscFree(ajfill);CHKERRQ(ierr);
24244e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
24254e2b4712SSatish Balay   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2426606d414cSSatish Balay   ierr = PetscFree(fill);CHKERRQ(ierr);
2427606d414cSSatish Balay   ierr = PetscFree(im);CHKERRQ(ierr);
24284e2b4712SSatish Balay 
24294e2b4712SSatish Balay   {
24304e2b4712SSatish Balay     double af = ((double)ainew[n])/((double)ai[n]);
2431981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
24324e2b4712SSatish Balay              realloc,f,af);
2433981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
2434981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
2435981c4779SBarry Smith     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
2436335d9088SBarry Smith     if (diagonal_fill) {
2437335d9088SBarry Smith       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
2438335d9088SBarry Smith     }
24394e2b4712SSatish Balay   }
24404e2b4712SSatish Balay 
24414e2b4712SSatish Balay   /* put together the new matrix */
24424e2b4712SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
2443fa6007c9SSatish Balay   PLogObjectParent(*fact,isicol);
24444e2b4712SSatish Balay   b = (Mat_SeqBAIJ *) (*fact)->data;
2445606d414cSSatish Balay   ierr = PetscFree(b->imax);CHKERRQ(ierr);
24464e2b4712SSatish Balay   b->singlemalloc = 0;
24473f1db9ecSBarry Smith   len = bs2*ainew[n]*sizeof(MatScalar);
24484e2b4712SSatish Balay   /* the next line frees the default space generated by the Create() */
2449606d414cSSatish Balay   ierr = PetscFree(b->a);CHKERRQ(ierr);
2450606d414cSSatish Balay   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
24513f1db9ecSBarry Smith   b->a          = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
24524e2b4712SSatish Balay   b->j          = ajnew;
24534e2b4712SSatish Balay   b->i          = ainew;
24544e2b4712SSatish Balay   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
24554e2b4712SSatish Balay   b->diag       = dloc;
24564e2b4712SSatish Balay   b->ilen       = 0;
24574e2b4712SSatish Balay   b->imax       = 0;
24584e2b4712SSatish Balay   b->row        = isrow;
24594e2b4712SSatish Balay   b->col        = iscol;
2460e51c0b9cSSatish Balay   b->icol       = isicol;
24614e2b4712SSatish Balay   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
24624e2b4712SSatish Balay   /* In b structure:  Free imax, ilen, old a, old j.
24634e2b4712SSatish Balay      Allocate dloc, solve_work, new a, new j */
24644e2b4712SSatish Balay   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
24654e2b4712SSatish Balay   b->maxnz          = b->nz = ainew[n];
24664e2b4712SSatish Balay   (*fact)->factor   = FACTOR_LU;
24674e2b4712SSatish Balay 
24684e2b4712SSatish Balay   (*fact)->info.factor_mallocs    = realloc;
24694e2b4712SSatish Balay   (*fact)->info.fill_ratio_given  = f;
24704e2b4712SSatish Balay   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
24714e2b4712SSatish Balay 
24724e2b4712SSatish Balay   PetscFunctionReturn(0);
24734e2b4712SSatish Balay }
24744e2b4712SSatish Balay 
24754e2b4712SSatish Balay 
24764e2b4712SSatish Balay 
24774e2b4712SSatish Balay 
2478