xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1*b0a32e0cSBarry Smith /*$Id: baijfact2.c,v 1.45 2001/01/06 15:35:13 bsmith Exp bsmith $*/
24e2b4712SSatish Balay /*
34e2b4712SSatish Balay     Factorization code for BAIJ format.
44e2b4712SSatish Balay */
54e2b4712SSatish Balay 
64e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h"
74e2b4712SSatish Balay #include "src/vec/vecimpl.h"
84e2b4712SSatish Balay #include "src/inline/ilu.h"
974c49faeSBarry Smith #include "src/inline/dot.h"
104e2b4712SSatish Balay 
11f1af5d2fSBarry Smith #undef __FUNC__
129d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_1_NaturalOrdering"
137c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
14f1af5d2fSBarry Smith {
15f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
16f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz;
17f1af5d2fSBarry Smith   int             *diag = a->diag;
18f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
19f1af5d2fSBarry Smith   Scalar          s1,*x,*b;
20f1af5d2fSBarry Smith 
21f1af5d2fSBarry Smith   PetscFunctionBegin;
22f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
23f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
24f1af5d2fSBarry Smith 
25f1af5d2fSBarry Smith   /* forward solve the U^T */
26f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
27f1af5d2fSBarry Smith 
28f1af5d2fSBarry Smith     v     = aa + diag[i];
29f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
30f1af5d2fSBarry Smith     s1    = (*v++)*b[i];
31f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
32f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
33f1af5d2fSBarry Smith     while (nz--) {
34f1af5d2fSBarry Smith       x[*vi++]  -= (*v++)*s1;
35f1af5d2fSBarry Smith     }
36f1af5d2fSBarry Smith     x[i]   = s1;
37f1af5d2fSBarry Smith   }
38f1af5d2fSBarry Smith   /* backward solve the L^T */
39f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
40f1af5d2fSBarry Smith     v    = aa + diag[i] - 1;
41f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
42f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
43f1af5d2fSBarry Smith     s1   = x[i];
44f1af5d2fSBarry Smith     while (nz--) {
45f1af5d2fSBarry Smith       x[*vi--]   -=  (*v--)*s1;
46f1af5d2fSBarry Smith     }
47f1af5d2fSBarry Smith   }
48f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
49f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
50*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->nz) - A->n);
51f1af5d2fSBarry Smith   PetscFunctionReturn(0);
52f1af5d2fSBarry Smith }
53f1af5d2fSBarry Smith 
54f1af5d2fSBarry Smith #undef __FUNC__
559d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_2_NaturalOrdering"
567c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
57f1af5d2fSBarry Smith {
58f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
59f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
60f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
61f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
62f1af5d2fSBarry Smith   Scalar          s1,s2,x1,x2;
63f1af5d2fSBarry Smith   Scalar          *x,*b;
64f1af5d2fSBarry Smith 
65f1af5d2fSBarry Smith   PetscFunctionBegin;
66f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
67f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
68f1af5d2fSBarry Smith 
69f1af5d2fSBarry Smith   /* forward solve the U^T */
70f1af5d2fSBarry Smith   idx = 0;
71f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
72f1af5d2fSBarry Smith 
73f1af5d2fSBarry Smith     v     = aa + 4*diag[i];
74f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
75f1af5d2fSBarry Smith     x1 = b[idx];   x2 = b[1+idx];
76f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2;
77f1af5d2fSBarry Smith     s2 = v[2]*x1  +  v[3]*x2;
78f1af5d2fSBarry Smith     v += 4;
79f1af5d2fSBarry Smith 
80f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
81f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
82f1af5d2fSBarry Smith     while (nz--) {
83f1af5d2fSBarry Smith       oidx = 2*(*vi++);
84f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2;
85f1af5d2fSBarry Smith       x[oidx+1] -= v[2]*s1  +  v[3]*s2;
86f1af5d2fSBarry Smith       v  += 4;
87f1af5d2fSBarry Smith     }
88f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2;
89f1af5d2fSBarry Smith     idx += 2;
90f1af5d2fSBarry Smith   }
91f1af5d2fSBarry Smith   /* backward solve the L^T */
92f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
93f1af5d2fSBarry Smith     v    = aa + 4*diag[i] - 4;
94f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
95f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
96f1af5d2fSBarry Smith     idt  = 2*i;
97f1af5d2fSBarry Smith     s1   = x[idt];  s2 = x[1+idt];
98f1af5d2fSBarry Smith     while (nz--) {
99f1af5d2fSBarry Smith       idx   = 2*(*vi--);
100f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2;
101f1af5d2fSBarry Smith       x[idx+1] -=  v[2]*s1 +  v[3]*s2;
102f1af5d2fSBarry Smith       v -= 4;
103f1af5d2fSBarry Smith     }
104f1af5d2fSBarry Smith   }
105f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
106f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
107*b0a32e0cSBarry Smith   PetscLogFlops(2*4*(a->nz) - 2*A->n);
108f1af5d2fSBarry Smith   PetscFunctionReturn(0);
109f1af5d2fSBarry Smith }
110f1af5d2fSBarry Smith 
111f1af5d2fSBarry Smith #undef __FUNC__
1129d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_3_NaturalOrdering"
1137c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
114f1af5d2fSBarry Smith {
115f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
116f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
117f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
118f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
119f1af5d2fSBarry Smith   Scalar          s1,s2,s3,x1,x2,x3;
120f1af5d2fSBarry Smith   Scalar          *x,*b;
121f1af5d2fSBarry Smith 
122f1af5d2fSBarry Smith   PetscFunctionBegin;
123f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
124f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
125f1af5d2fSBarry Smith 
126f1af5d2fSBarry Smith   /* forward solve the U^T */
127f1af5d2fSBarry Smith   idx = 0;
128f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
129f1af5d2fSBarry Smith 
130f1af5d2fSBarry Smith     v     = aa + 9*diag[i];
131f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
132f1af5d2fSBarry Smith     x1 = b[idx];   x2 = b[1+idx]; x3    = b[2+idx];
133f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3;
134f1af5d2fSBarry Smith     s2 = v[3]*x1  +  v[4]*x2 +  v[5]*x3;
135f1af5d2fSBarry Smith     s3 = v[6]*x1  +  v[7]*x2 + v[8]*x3;
136f1af5d2fSBarry Smith     v += 9;
137f1af5d2fSBarry Smith 
138f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
139f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
140f1af5d2fSBarry Smith     while (nz--) {
141f1af5d2fSBarry Smith       oidx = 3*(*vi++);
142f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3;
143f1af5d2fSBarry Smith       x[oidx+1] -= v[3]*s1  +  v[4]*s2 +  v[5]*s3;
144f1af5d2fSBarry Smith       x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
145f1af5d2fSBarry Smith       v  += 9;
146f1af5d2fSBarry Smith     }
147f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;
148f1af5d2fSBarry Smith     idx += 3;
149f1af5d2fSBarry Smith   }
150f1af5d2fSBarry Smith   /* backward solve the L^T */
151f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
152f1af5d2fSBarry Smith     v    = aa + 9*diag[i] - 9;
153f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
154f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
155f1af5d2fSBarry Smith     idt  = 3*i;
156f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];
157f1af5d2fSBarry Smith     while (nz--) {
158f1af5d2fSBarry Smith       idx   = 3*(*vi--);
159f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3;
160f1af5d2fSBarry Smith       x[idx+1] -=  v[3]*s1 +  v[4]*s2 +  v[5]*s3;
161f1af5d2fSBarry Smith       x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
162f1af5d2fSBarry Smith       v -= 9;
163f1af5d2fSBarry Smith     }
164f1af5d2fSBarry Smith   }
165f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
166f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
167*b0a32e0cSBarry Smith   PetscLogFlops(2*9*(a->nz) - 3*A->n);
168f1af5d2fSBarry Smith   PetscFunctionReturn(0);
169f1af5d2fSBarry Smith }
170f1af5d2fSBarry Smith 
171f1af5d2fSBarry Smith #undef __FUNC__
1729d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_4_NaturalOrdering"
1737c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
174f1af5d2fSBarry Smith {
175f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
176f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
177f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
178f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
179f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,x1,x2,x3,x4;
180f1af5d2fSBarry Smith   Scalar          *x,*b;
181f1af5d2fSBarry Smith 
182f1af5d2fSBarry Smith   PetscFunctionBegin;
183f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
184f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
185f1af5d2fSBarry Smith 
186f1af5d2fSBarry Smith   /* forward solve the U^T */
187f1af5d2fSBarry Smith   idx = 0;
188f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
189f1af5d2fSBarry Smith 
190f1af5d2fSBarry Smith     v     = aa + 16*diag[i];
191f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
192f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx];
193f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
194f1af5d2fSBarry Smith     s2 = v[4]*x1  +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
195f1af5d2fSBarry Smith     s3 = v[8]*x1  +  v[9]*x2 + v[10]*x3 + v[11]*x4;
196f1af5d2fSBarry Smith     s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
197f1af5d2fSBarry Smith     v += 16;
198f1af5d2fSBarry Smith 
199f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
200f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
201f1af5d2fSBarry Smith     while (nz--) {
202f1af5d2fSBarry Smith       oidx = 4*(*vi++);
203f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
204f1af5d2fSBarry Smith       x[oidx+1] -= v[4]*s1  +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
205f1af5d2fSBarry Smith       x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
206f1af5d2fSBarry Smith       x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
207f1af5d2fSBarry Smith       v  += 16;
208f1af5d2fSBarry Smith     }
209f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4;
210f1af5d2fSBarry Smith     idx += 4;
211f1af5d2fSBarry Smith   }
212f1af5d2fSBarry Smith   /* backward solve the L^T */
213f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
214f1af5d2fSBarry Smith     v    = aa + 16*diag[i] - 16;
215f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
216f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
217f1af5d2fSBarry Smith     idt  = 4*i;
218f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt];
219f1af5d2fSBarry Smith     while (nz--) {
220f1af5d2fSBarry Smith       idx   = 4*(*vi--);
221f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
222f1af5d2fSBarry Smith       x[idx+1] -=  v[4]*s1 +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
223f1af5d2fSBarry Smith       x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
224f1af5d2fSBarry Smith       x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
225f1af5d2fSBarry Smith       v -= 16;
226f1af5d2fSBarry Smith     }
227f1af5d2fSBarry Smith   }
228f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
229f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
230*b0a32e0cSBarry Smith   PetscLogFlops(2*16*(a->nz) - 4*A->n);
231f1af5d2fSBarry Smith   PetscFunctionReturn(0);
232f1af5d2fSBarry Smith }
233f1af5d2fSBarry Smith 
234f1af5d2fSBarry Smith #undef __FUNC__
2359d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_5_NaturalOrdering"
2367c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
237f1af5d2fSBarry Smith {
238f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
239f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
240f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
241f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
242f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
243f1af5d2fSBarry Smith   Scalar          *x,*b;
244f1af5d2fSBarry Smith 
245f1af5d2fSBarry Smith   PetscFunctionBegin;
246f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
247f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
248f1af5d2fSBarry Smith 
249f1af5d2fSBarry Smith   /* forward solve the U^T */
250f1af5d2fSBarry Smith   idx = 0;
251f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
252f1af5d2fSBarry Smith 
253f1af5d2fSBarry Smith     v     = aa + 25*diag[i];
254f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
255f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
256f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
257f1af5d2fSBarry Smith     s2 = v[5]*x1  +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
258f1af5d2fSBarry Smith     s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
259f1af5d2fSBarry Smith     s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
260f1af5d2fSBarry Smith     s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
261f1af5d2fSBarry Smith     v += 25;
262f1af5d2fSBarry Smith 
263f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
264f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
265f1af5d2fSBarry Smith     while (nz--) {
266f1af5d2fSBarry Smith       oidx = 5*(*vi++);
267f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
268f1af5d2fSBarry Smith       x[oidx+1] -= v[5]*s1  +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
269f1af5d2fSBarry Smith       x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
270f1af5d2fSBarry Smith       x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
271f1af5d2fSBarry Smith       x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
272f1af5d2fSBarry Smith       v  += 25;
273f1af5d2fSBarry Smith     }
274f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
275f1af5d2fSBarry Smith     idx += 5;
276f1af5d2fSBarry Smith   }
277f1af5d2fSBarry Smith   /* backward solve the L^T */
278f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
279f1af5d2fSBarry Smith     v    = aa + 25*diag[i] - 25;
280f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
281f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
282f1af5d2fSBarry Smith     idt  = 5*i;
283f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
284f1af5d2fSBarry Smith     while (nz--) {
285f1af5d2fSBarry Smith       idx   = 5*(*vi--);
286f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
287f1af5d2fSBarry Smith       x[idx+1] -=  v[5]*s1 +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
288f1af5d2fSBarry Smith       x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
289f1af5d2fSBarry Smith       x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
290f1af5d2fSBarry Smith       x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
291f1af5d2fSBarry Smith       v -= 25;
292f1af5d2fSBarry Smith     }
293f1af5d2fSBarry Smith   }
294f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
295f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
296*b0a32e0cSBarry Smith   PetscLogFlops(2*25*(a->nz) - 5*A->n);
297f1af5d2fSBarry Smith   PetscFunctionReturn(0);
298f1af5d2fSBarry Smith }
299f1af5d2fSBarry Smith 
300f1af5d2fSBarry Smith #undef __FUNC__
3019d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_6_NaturalOrdering"
3027c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
303f1af5d2fSBarry Smith {
304f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
305f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
306f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
307f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
308f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
309f1af5d2fSBarry Smith   Scalar          *x,*b;
310f1af5d2fSBarry Smith 
311f1af5d2fSBarry Smith   PetscFunctionBegin;
312f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
313f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
314f1af5d2fSBarry Smith 
315f1af5d2fSBarry Smith   /* forward solve the U^T */
316f1af5d2fSBarry Smith   idx = 0;
317f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
318f1af5d2fSBarry Smith 
319f1af5d2fSBarry Smith     v     = aa + 36*diag[i];
320f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
321f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
322f1af5d2fSBarry Smith     x6    = b[5+idx];
323f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6;
324f1af5d2fSBarry Smith     s2 = v[6]*x1  +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
325f1af5d2fSBarry Smith     s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
326f1af5d2fSBarry Smith     s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
327f1af5d2fSBarry Smith     s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
328f1af5d2fSBarry Smith     s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
329f1af5d2fSBarry Smith     v += 36;
330f1af5d2fSBarry Smith 
331f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
332f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
333f1af5d2fSBarry Smith     while (nz--) {
334f1af5d2fSBarry Smith       oidx = 6*(*vi++);
335f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
336f1af5d2fSBarry Smith       x[oidx+1] -= v[6]*s1  +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
337f1af5d2fSBarry Smith       x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
338f1af5d2fSBarry Smith       x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
339f1af5d2fSBarry Smith       x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
340f1af5d2fSBarry Smith       x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
341f1af5d2fSBarry Smith       v  += 36;
342f1af5d2fSBarry Smith     }
343f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
344f1af5d2fSBarry Smith     x[5+idx] = s6;
345f1af5d2fSBarry Smith     idx += 6;
346f1af5d2fSBarry Smith   }
347f1af5d2fSBarry Smith   /* backward solve the L^T */
348f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
349f1af5d2fSBarry Smith     v    = aa + 36*diag[i] - 36;
350f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
351f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
352f1af5d2fSBarry Smith     idt  = 6*i;
353f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
354f1af5d2fSBarry Smith     s6 = x[5+idt];
355f1af5d2fSBarry Smith     while (nz--) {
356f1af5d2fSBarry Smith       idx   = 6*(*vi--);
357f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
358f1af5d2fSBarry Smith       x[idx+1] -=  v[6]*s1 +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
359f1af5d2fSBarry Smith       x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
360f1af5d2fSBarry Smith       x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
361f1af5d2fSBarry Smith       x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
362f1af5d2fSBarry Smith       x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
363f1af5d2fSBarry Smith       v -= 36;
364f1af5d2fSBarry Smith     }
365f1af5d2fSBarry Smith   }
366f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
367f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
368*b0a32e0cSBarry Smith   PetscLogFlops(2*36*(a->nz) - 6*A->n);
369f1af5d2fSBarry Smith   PetscFunctionReturn(0);
370f1af5d2fSBarry Smith }
371f1af5d2fSBarry Smith 
372f1af5d2fSBarry Smith #undef __FUNC__
3739d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_7_NaturalOrdering"
3747c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
375f1af5d2fSBarry Smith {
376f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
377f1af5d2fSBarry Smith   int             ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
378f1af5d2fSBarry Smith   int             *diag = a->diag,oidx;
379f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
380f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
381f1af5d2fSBarry Smith   Scalar          *x,*b;
382f1af5d2fSBarry Smith 
383f1af5d2fSBarry Smith   PetscFunctionBegin;
384f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
385f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
386f1af5d2fSBarry Smith 
387f1af5d2fSBarry Smith   /* forward solve the U^T */
388f1af5d2fSBarry Smith   idx = 0;
389f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
390f1af5d2fSBarry Smith 
391f1af5d2fSBarry Smith     v     = aa + 49*diag[i];
392f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
393f1af5d2fSBarry Smith     x1    = b[idx];   x2 = b[1+idx]; x3    = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
394f1af5d2fSBarry Smith     x6    = b[5+idx]; x7 = b[6+idx];
395f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6 +  v[6]*x7;
396f1af5d2fSBarry Smith     s2 = v[7]*x1  +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
397f1af5d2fSBarry Smith     s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
398f1af5d2fSBarry Smith     s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
399f1af5d2fSBarry Smith     s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
400f1af5d2fSBarry Smith     s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
401f1af5d2fSBarry Smith     s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
402f1af5d2fSBarry Smith     v += 49;
403f1af5d2fSBarry Smith 
404f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
405f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
406f1af5d2fSBarry Smith     while (nz--) {
407f1af5d2fSBarry Smith       oidx = 7*(*vi++);
408f1af5d2fSBarry Smith       x[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
409f1af5d2fSBarry Smith       x[oidx+1] -= v[7]*s1  +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
410f1af5d2fSBarry Smith       x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
411f1af5d2fSBarry Smith       x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
412f1af5d2fSBarry Smith       x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
413f1af5d2fSBarry Smith       x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
414f1af5d2fSBarry Smith       x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
415f1af5d2fSBarry Smith       v  += 49;
416f1af5d2fSBarry Smith     }
417f1af5d2fSBarry Smith     x[idx]   = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
418f1af5d2fSBarry Smith     x[5+idx] = s6;x[6+idx] = s7;
419f1af5d2fSBarry Smith     idx += 7;
420f1af5d2fSBarry Smith   }
421f1af5d2fSBarry Smith   /* backward solve the L^T */
422f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
423f1af5d2fSBarry Smith     v    = aa + 49*diag[i] - 49;
424f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
425f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
426f1af5d2fSBarry Smith     idt  = 7*i;
427f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
428f1af5d2fSBarry Smith     s6 = x[5+idt];s7 = x[6+idt];
429f1af5d2fSBarry Smith     while (nz--) {
430f1af5d2fSBarry Smith       idx   = 7*(*vi--);
431f1af5d2fSBarry Smith       x[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
432f1af5d2fSBarry Smith       x[idx+1] -=  v[7]*s1 +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
433f1af5d2fSBarry Smith       x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
434f1af5d2fSBarry Smith       x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
435f1af5d2fSBarry Smith       x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
436f1af5d2fSBarry Smith       x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
437f1af5d2fSBarry Smith       x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
438f1af5d2fSBarry Smith       v -= 49;
439f1af5d2fSBarry Smith     }
440f1af5d2fSBarry Smith   }
441f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
442f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
443*b0a32e0cSBarry Smith   PetscLogFlops(2*49*(a->nz) - 7*A->n);
444f1af5d2fSBarry Smith   PetscFunctionReturn(0);
445f1af5d2fSBarry Smith }
446f1af5d2fSBarry Smith 
447f1af5d2fSBarry Smith /*---------------------------------------------------------------------------------------------*/
448f1af5d2fSBarry Smith #undef __FUNC__
4499d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_1"
4507c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
451f1af5d2fSBarry Smith {
452f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
453f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
454f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
455f1af5d2fSBarry Smith   int             *diag = a->diag;
456f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
457f1af5d2fSBarry Smith   Scalar          s1,*x,*b,*t;
458f1af5d2fSBarry Smith 
459f1af5d2fSBarry Smith   PetscFunctionBegin;
460f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
461f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
462f1af5d2fSBarry Smith   t  = a->solve_work;
463f1af5d2fSBarry Smith 
464f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
465f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
466f1af5d2fSBarry Smith 
467f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
468f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
469f1af5d2fSBarry Smith     t[i] = b[c[i]];
470f1af5d2fSBarry Smith   }
471f1af5d2fSBarry Smith 
472f1af5d2fSBarry Smith   /* forward solve the U^T */
473f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
474f1af5d2fSBarry Smith 
475f1af5d2fSBarry Smith     v     = aa + diag[i];
476f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
477f1af5d2fSBarry Smith     s1    = (*v++)*t[i];
478f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
479f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
480f1af5d2fSBarry Smith     while (nz--) {
481f1af5d2fSBarry Smith       t[*vi++]  -= (*v++)*s1;
482f1af5d2fSBarry Smith     }
483f1af5d2fSBarry Smith     t[i]   = s1;
484f1af5d2fSBarry Smith   }
485f1af5d2fSBarry Smith   /* backward solve the L^T */
486f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
487f1af5d2fSBarry Smith     v    = aa + diag[i] - 1;
488f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
489f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
490f1af5d2fSBarry Smith     s1   = t[i];
491f1af5d2fSBarry Smith     while (nz--) {
492f1af5d2fSBarry Smith       t[*vi--]   -=  (*v--)*s1;
493f1af5d2fSBarry Smith     }
494f1af5d2fSBarry Smith   }
495f1af5d2fSBarry Smith 
496f1af5d2fSBarry Smith   /* copy t into x according to permutation */
497f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
498f1af5d2fSBarry Smith     x[r[i]]   = t[i];
499f1af5d2fSBarry Smith   }
500f1af5d2fSBarry Smith 
501f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
502f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
503f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
504f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
505*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->nz) - A->n);
506f1af5d2fSBarry Smith   PetscFunctionReturn(0);
507f1af5d2fSBarry Smith }
508f1af5d2fSBarry Smith 
509f1af5d2fSBarry Smith #undef __FUNC__
5109d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_2"
5117c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
512f1af5d2fSBarry Smith {
513f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
514f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
515f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
516f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
517f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
518f1af5d2fSBarry Smith   Scalar          s1,s2,x1,x2;
519f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
520f1af5d2fSBarry Smith 
521f1af5d2fSBarry Smith   PetscFunctionBegin;
522f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
523f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
524f1af5d2fSBarry Smith   t  = a->solve_work;
525f1af5d2fSBarry Smith 
526f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
527f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
528f1af5d2fSBarry Smith 
529f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
530f1af5d2fSBarry Smith   ii = 0;
531f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
532f1af5d2fSBarry Smith     ic      = 2*c[i];
533f1af5d2fSBarry Smith     t[ii]   = b[ic];
534f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
535f1af5d2fSBarry Smith     ii += 2;
536f1af5d2fSBarry Smith   }
537f1af5d2fSBarry Smith 
538f1af5d2fSBarry Smith   /* forward solve the U^T */
539f1af5d2fSBarry Smith   idx = 0;
540f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
541f1af5d2fSBarry Smith 
542f1af5d2fSBarry Smith     v     = aa + 4*diag[i];
543f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
544f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx];
545f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2;
546f1af5d2fSBarry Smith     s2 = v[2]*x1  +  v[3]*x2;
547f1af5d2fSBarry Smith     v += 4;
548f1af5d2fSBarry Smith 
549f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
550f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
551f1af5d2fSBarry Smith     while (nz--) {
552f1af5d2fSBarry Smith       oidx = 2*(*vi++);
553f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2;
554f1af5d2fSBarry Smith       t[oidx+1] -= v[2]*s1  +  v[3]*s2;
555f1af5d2fSBarry Smith       v  += 4;
556f1af5d2fSBarry Smith     }
557f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
558f1af5d2fSBarry Smith     idx += 2;
559f1af5d2fSBarry Smith   }
560f1af5d2fSBarry Smith   /* backward solve the L^T */
561f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
562f1af5d2fSBarry Smith     v    = aa + 4*diag[i] - 4;
563f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
564f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
565f1af5d2fSBarry Smith     idt  = 2*i;
566f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
567f1af5d2fSBarry Smith     while (nz--) {
568f1af5d2fSBarry Smith       idx   = 2*(*vi--);
569f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2;
570f1af5d2fSBarry Smith       t[idx+1] -=  v[2]*s1 +  v[3]*s2;
571f1af5d2fSBarry Smith       v -= 4;
572f1af5d2fSBarry Smith     }
573f1af5d2fSBarry Smith   }
574f1af5d2fSBarry Smith 
575f1af5d2fSBarry Smith   /* copy t into x according to permutation */
576f1af5d2fSBarry Smith   ii = 0;
577f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
578f1af5d2fSBarry Smith     ir      = 2*r[i];
579f1af5d2fSBarry Smith     x[ir]   = t[ii];
580f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
581f1af5d2fSBarry Smith     ii += 2;
582f1af5d2fSBarry Smith   }
583f1af5d2fSBarry Smith 
584f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
585f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
586f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
587f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
588*b0a32e0cSBarry Smith   PetscLogFlops(2*4*(a->nz) - 2*A->n);
589f1af5d2fSBarry Smith   PetscFunctionReturn(0);
590f1af5d2fSBarry Smith }
591f1af5d2fSBarry Smith 
592f1af5d2fSBarry Smith #undef __FUNC__
5939d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_3"
5947c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
595f1af5d2fSBarry Smith {
596f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
597f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
598f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
599f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
600f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
601f1af5d2fSBarry Smith   Scalar          s1,s2,s3,x1,x2,x3;
602f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
603f1af5d2fSBarry Smith 
604f1af5d2fSBarry Smith   PetscFunctionBegin;
605f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
606f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607f1af5d2fSBarry Smith   t  = a->solve_work;
608f1af5d2fSBarry Smith 
609f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
610f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
611f1af5d2fSBarry Smith 
612f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
613f1af5d2fSBarry Smith   ii = 0;
614f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
615f1af5d2fSBarry Smith     ic      = 3*c[i];
616f1af5d2fSBarry Smith     t[ii]   = b[ic];
617f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
618f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
619f1af5d2fSBarry Smith     ii += 3;
620f1af5d2fSBarry Smith   }
621f1af5d2fSBarry Smith 
622f1af5d2fSBarry Smith   /* forward solve the U^T */
623f1af5d2fSBarry Smith   idx = 0;
624f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
625f1af5d2fSBarry Smith 
626f1af5d2fSBarry Smith     v     = aa + 9*diag[i];
627f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
628f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx];
629f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3;
630f1af5d2fSBarry Smith     s2 = v[3]*x1  +  v[4]*x2 +  v[5]*x3;
631f1af5d2fSBarry Smith     s3 = v[6]*x1  +  v[7]*x2 + v[8]*x3;
632f1af5d2fSBarry Smith     v += 9;
633f1af5d2fSBarry Smith 
634f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
635f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
636f1af5d2fSBarry Smith     while (nz--) {
637f1af5d2fSBarry Smith       oidx = 3*(*vi++);
638f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3;
639f1af5d2fSBarry Smith       t[oidx+1] -= v[3]*s1  +  v[4]*s2 +  v[5]*s3;
640f1af5d2fSBarry Smith       t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
641f1af5d2fSBarry Smith       v  += 9;
642f1af5d2fSBarry Smith     }
643f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;
644f1af5d2fSBarry Smith     idx += 3;
645f1af5d2fSBarry Smith   }
646f1af5d2fSBarry Smith   /* backward solve the L^T */
647f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
648f1af5d2fSBarry Smith     v    = aa + 9*diag[i] - 9;
649f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
650f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
651f1af5d2fSBarry Smith     idt  = 3*i;
652f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];
653f1af5d2fSBarry Smith     while (nz--) {
654f1af5d2fSBarry Smith       idx   = 3*(*vi--);
655f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3;
656f1af5d2fSBarry Smith       t[idx+1] -=  v[3]*s1 +  v[4]*s2 +  v[5]*s3;
657f1af5d2fSBarry Smith       t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
658f1af5d2fSBarry Smith       v -= 9;
659f1af5d2fSBarry Smith     }
660f1af5d2fSBarry Smith   }
661f1af5d2fSBarry Smith 
662f1af5d2fSBarry Smith   /* copy t into x according to permutation */
663f1af5d2fSBarry Smith   ii = 0;
664f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
665f1af5d2fSBarry Smith     ir      = 3*r[i];
666f1af5d2fSBarry Smith     x[ir]   = t[ii];
667f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
668f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
669f1af5d2fSBarry Smith     ii += 3;
670f1af5d2fSBarry Smith   }
671f1af5d2fSBarry Smith 
672f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
673f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
674f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
675f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
676*b0a32e0cSBarry Smith   PetscLogFlops(2*9*(a->nz) - 3*A->n);
677f1af5d2fSBarry Smith   PetscFunctionReturn(0);
678f1af5d2fSBarry Smith }
679f1af5d2fSBarry Smith 
680f1af5d2fSBarry Smith #undef __FUNC__
6819d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_4"
6827c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
683f1af5d2fSBarry Smith {
684f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
685f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
686f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
687f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
688f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
689f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,x1,x2,x3,x4;
690f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
691f1af5d2fSBarry Smith 
692f1af5d2fSBarry Smith   PetscFunctionBegin;
693f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
694f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
695f1af5d2fSBarry Smith   t  = a->solve_work;
696f1af5d2fSBarry Smith 
697f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
698f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
699f1af5d2fSBarry Smith 
700f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
701f1af5d2fSBarry Smith   ii = 0;
702f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
703f1af5d2fSBarry Smith     ic      = 4*c[i];
704f1af5d2fSBarry Smith     t[ii]   = b[ic];
705f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
706f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
707f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
708f1af5d2fSBarry Smith     ii += 4;
709f1af5d2fSBarry Smith   }
710f1af5d2fSBarry Smith 
711f1af5d2fSBarry Smith   /* forward solve the U^T */
712f1af5d2fSBarry Smith   idx = 0;
713f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
714f1af5d2fSBarry Smith 
715f1af5d2fSBarry Smith     v     = aa + 16*diag[i];
716f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
717f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx];
718f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
719f1af5d2fSBarry Smith     s2 = v[4]*x1  +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
720f1af5d2fSBarry Smith     s3 = v[8]*x1  +  v[9]*x2 + v[10]*x3 + v[11]*x4;
721f1af5d2fSBarry Smith     s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
722f1af5d2fSBarry Smith     v += 16;
723f1af5d2fSBarry Smith 
724f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
725f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
726f1af5d2fSBarry Smith     while (nz--) {
727f1af5d2fSBarry Smith       oidx = 4*(*vi++);
728f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
729f1af5d2fSBarry Smith       t[oidx+1] -= v[4]*s1  +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
730f1af5d2fSBarry Smith       t[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
731f1af5d2fSBarry Smith       t[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
732f1af5d2fSBarry Smith       v  += 16;
733f1af5d2fSBarry Smith     }
734f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4;
735f1af5d2fSBarry Smith     idx += 4;
736f1af5d2fSBarry Smith   }
737f1af5d2fSBarry Smith   /* backward solve the L^T */
738f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
739f1af5d2fSBarry Smith     v    = aa + 16*diag[i] - 16;
740f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
741f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
742f1af5d2fSBarry Smith     idt  = 4*i;
743f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt];
744f1af5d2fSBarry Smith     while (nz--) {
745f1af5d2fSBarry Smith       idx   = 4*(*vi--);
746f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4;
747f1af5d2fSBarry Smith       t[idx+1] -=  v[4]*s1 +  v[5]*s2 +  v[6]*s3 +  v[7]*s4;
748f1af5d2fSBarry Smith       t[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
749f1af5d2fSBarry Smith       t[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
750f1af5d2fSBarry Smith       v -= 16;
751f1af5d2fSBarry Smith     }
752f1af5d2fSBarry Smith   }
753f1af5d2fSBarry Smith 
754f1af5d2fSBarry Smith   /* copy t into x according to permutation */
755f1af5d2fSBarry Smith   ii = 0;
756f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
757f1af5d2fSBarry Smith     ir      = 4*r[i];
758f1af5d2fSBarry Smith     x[ir]   = t[ii];
759f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
760f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
761f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
762f1af5d2fSBarry Smith     ii += 4;
763f1af5d2fSBarry Smith   }
764f1af5d2fSBarry Smith 
765f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
766f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
767f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
768f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
769*b0a32e0cSBarry Smith   PetscLogFlops(2*16*(a->nz) - 4*A->n);
770f1af5d2fSBarry Smith   PetscFunctionReturn(0);
771f1af5d2fSBarry Smith }
772f1af5d2fSBarry Smith 
773f1af5d2fSBarry Smith #undef __FUNC__
7749d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_5"
7757c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
776f1af5d2fSBarry Smith {
777f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
778f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
779f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
780f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
781f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
782f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
783f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
784f1af5d2fSBarry Smith 
785f1af5d2fSBarry Smith   PetscFunctionBegin;
786f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
787f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
788f1af5d2fSBarry Smith   t  = a->solve_work;
789f1af5d2fSBarry Smith 
790f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
791f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
792f1af5d2fSBarry Smith 
793f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
794f1af5d2fSBarry Smith   ii = 0;
795f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
796f1af5d2fSBarry Smith     ic      = 5*c[i];
797f1af5d2fSBarry Smith     t[ii]   = b[ic];
798f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
799f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
800f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
801f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
802f1af5d2fSBarry Smith     ii += 5;
803f1af5d2fSBarry Smith   }
804f1af5d2fSBarry Smith 
805f1af5d2fSBarry Smith   /* forward solve the U^T */
806f1af5d2fSBarry Smith   idx = 0;
807f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
808f1af5d2fSBarry Smith 
809f1af5d2fSBarry Smith     v     = aa + 25*diag[i];
810f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
811f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
812f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
813f1af5d2fSBarry Smith     s2 = v[5]*x1  +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
814f1af5d2fSBarry Smith     s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
815f1af5d2fSBarry Smith     s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
816f1af5d2fSBarry Smith     s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
817f1af5d2fSBarry Smith     v += 25;
818f1af5d2fSBarry Smith 
819f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
820f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
821f1af5d2fSBarry Smith     while (nz--) {
822f1af5d2fSBarry Smith       oidx = 5*(*vi++);
823f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
824f1af5d2fSBarry Smith       t[oidx+1] -= v[5]*s1  +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
825f1af5d2fSBarry Smith       t[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
826f1af5d2fSBarry Smith       t[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
827f1af5d2fSBarry Smith       t[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
828f1af5d2fSBarry Smith       v  += 25;
829f1af5d2fSBarry Smith     }
830f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
831f1af5d2fSBarry Smith     idx += 5;
832f1af5d2fSBarry Smith   }
833f1af5d2fSBarry Smith   /* backward solve the L^T */
834f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
835f1af5d2fSBarry Smith     v    = aa + 25*diag[i] - 25;
836f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
837f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
838f1af5d2fSBarry Smith     idt  = 5*i;
839f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
840f1af5d2fSBarry Smith     while (nz--) {
841f1af5d2fSBarry Smith       idx   = 5*(*vi--);
842f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5;
843f1af5d2fSBarry Smith       t[idx+1] -=  v[5]*s1 +  v[6]*s2 +  v[7]*s3 +  v[8]*s4 +  v[9]*s5;
844f1af5d2fSBarry Smith       t[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
845f1af5d2fSBarry Smith       t[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
846f1af5d2fSBarry Smith       t[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
847f1af5d2fSBarry Smith       v -= 25;
848f1af5d2fSBarry Smith     }
849f1af5d2fSBarry Smith   }
850f1af5d2fSBarry Smith 
851f1af5d2fSBarry Smith   /* copy t into x according to permutation */
852f1af5d2fSBarry Smith   ii = 0;
853f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
854f1af5d2fSBarry Smith     ir      = 5*r[i];
855f1af5d2fSBarry Smith     x[ir]   = t[ii];
856f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
857f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
858f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
859f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
860f1af5d2fSBarry Smith     ii += 5;
861f1af5d2fSBarry Smith   }
862f1af5d2fSBarry Smith 
863f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
864f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
865f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
866f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
867*b0a32e0cSBarry Smith   PetscLogFlops(2*25*(a->nz) - 5*A->n);
868f1af5d2fSBarry Smith   PetscFunctionReturn(0);
869f1af5d2fSBarry Smith }
870f1af5d2fSBarry Smith 
871f1af5d2fSBarry Smith #undef __FUNC__
8729d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_6"
8737c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
874f1af5d2fSBarry Smith {
875f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
876f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
877f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
878f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
879f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
880f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
881f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
882f1af5d2fSBarry Smith 
883f1af5d2fSBarry Smith   PetscFunctionBegin;
884f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
885f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886f1af5d2fSBarry Smith   t  = a->solve_work;
887f1af5d2fSBarry Smith 
888f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
889f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
890f1af5d2fSBarry Smith 
891f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
892f1af5d2fSBarry Smith   ii = 0;
893f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
894f1af5d2fSBarry Smith     ic      = 6*c[i];
895f1af5d2fSBarry Smith     t[ii]   = b[ic];
896f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
897f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
898f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
899f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
900f1af5d2fSBarry Smith     t[ii+5] = b[ic+5];
901f1af5d2fSBarry Smith     ii += 6;
902f1af5d2fSBarry Smith   }
903f1af5d2fSBarry Smith 
904f1af5d2fSBarry Smith   /* forward solve the U^T */
905f1af5d2fSBarry Smith   idx = 0;
906f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
907f1af5d2fSBarry Smith 
908f1af5d2fSBarry Smith     v     = aa + 36*diag[i];
909f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
910f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
911f1af5d2fSBarry Smith     x6    = t[5+idx];
912f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6;
913f1af5d2fSBarry Smith     s2 = v[6]*x1  +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
914f1af5d2fSBarry Smith     s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
915f1af5d2fSBarry Smith     s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
916f1af5d2fSBarry Smith     s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
917f1af5d2fSBarry Smith     s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
918f1af5d2fSBarry Smith     v += 36;
919f1af5d2fSBarry Smith 
920f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
921f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
922f1af5d2fSBarry Smith     while (nz--) {
923f1af5d2fSBarry Smith       oidx = 6*(*vi++);
924f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
925f1af5d2fSBarry Smith       t[oidx+1] -= v[6]*s1  +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
926f1af5d2fSBarry Smith       t[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
927f1af5d2fSBarry Smith       t[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
928f1af5d2fSBarry Smith       t[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
929f1af5d2fSBarry Smith       t[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
930f1af5d2fSBarry Smith       v  += 36;
931f1af5d2fSBarry Smith     }
932f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
933f1af5d2fSBarry Smith     t[5+idx] = s6;
934f1af5d2fSBarry Smith     idx += 6;
935f1af5d2fSBarry Smith   }
936f1af5d2fSBarry Smith   /* backward solve the L^T */
937f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
938f1af5d2fSBarry Smith     v    = aa + 36*diag[i] - 36;
939f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
940f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
941f1af5d2fSBarry Smith     idt  = 6*i;
942f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
943f1af5d2fSBarry Smith     s6 = t[5+idt];
944f1af5d2fSBarry Smith     while (nz--) {
945f1af5d2fSBarry Smith       idx   = 6*(*vi--);
946f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6;
947f1af5d2fSBarry Smith       t[idx+1] -=  v[6]*s1 +  v[7]*s2 +  v[8]*s3 +  v[9]*s4 + v[10]*s5 + v[11]*s6;
948f1af5d2fSBarry Smith       t[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
949f1af5d2fSBarry Smith       t[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
950f1af5d2fSBarry Smith       t[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
951f1af5d2fSBarry Smith       t[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
952f1af5d2fSBarry Smith       v -= 36;
953f1af5d2fSBarry Smith     }
954f1af5d2fSBarry Smith   }
955f1af5d2fSBarry Smith 
956f1af5d2fSBarry Smith   /* copy t into x according to permutation */
957f1af5d2fSBarry Smith   ii = 0;
958f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
959f1af5d2fSBarry Smith     ir      = 6*r[i];
960f1af5d2fSBarry Smith     x[ir]   = t[ii];
961f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
962f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
963f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
964f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
965f1af5d2fSBarry Smith     x[ir+5] = t[ii+5];
966f1af5d2fSBarry Smith     ii += 6;
967f1af5d2fSBarry Smith   }
968f1af5d2fSBarry Smith 
969f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
970f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
971f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
972f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
973*b0a32e0cSBarry Smith   PetscLogFlops(2*36*(a->nz) - 6*A->n);
974f1af5d2fSBarry Smith   PetscFunctionReturn(0);
975f1af5d2fSBarry Smith }
976f1af5d2fSBarry Smith 
977f1af5d2fSBarry Smith #undef __FUNC__
9789d082aa4SBarry Smith #define __FUNC__ "MatSolveTranspose_SeqBAIJ_7"
9797c922b88SBarry Smith int MatSolveTranspose_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
980f1af5d2fSBarry Smith {
981f1af5d2fSBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
982f1af5d2fSBarry Smith   IS              iscol=a->col,isrow=a->row;
983f1af5d2fSBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
984f1af5d2fSBarry Smith   int             *diag = a->diag,ii,ic,ir,oidx;
985f1af5d2fSBarry Smith   MatScalar       *aa=a->a,*v;
986f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
987f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
988f1af5d2fSBarry Smith 
989f1af5d2fSBarry Smith   PetscFunctionBegin;
990f1af5d2fSBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
991f1af5d2fSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992f1af5d2fSBarry Smith   t  = a->solve_work;
993f1af5d2fSBarry Smith 
994f1af5d2fSBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
995f1af5d2fSBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
996f1af5d2fSBarry Smith 
997f1af5d2fSBarry Smith   /* copy the b into temp work space according to permutation */
998f1af5d2fSBarry Smith   ii = 0;
999f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
1000f1af5d2fSBarry Smith     ic      = 7*c[i];
1001f1af5d2fSBarry Smith     t[ii]   = b[ic];
1002f1af5d2fSBarry Smith     t[ii+1] = b[ic+1];
1003f1af5d2fSBarry Smith     t[ii+2] = b[ic+2];
1004f1af5d2fSBarry Smith     t[ii+3] = b[ic+3];
1005f1af5d2fSBarry Smith     t[ii+4] = b[ic+4];
1006f1af5d2fSBarry Smith     t[ii+5] = b[ic+5];
1007f1af5d2fSBarry Smith     t[ii+6] = b[ic+6];
1008f1af5d2fSBarry Smith     ii += 7;
1009f1af5d2fSBarry Smith   }
1010f1af5d2fSBarry Smith 
1011f1af5d2fSBarry Smith   /* forward solve the U^T */
1012f1af5d2fSBarry Smith   idx = 0;
1013f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
1014f1af5d2fSBarry Smith 
1015f1af5d2fSBarry Smith     v     = aa + 49*diag[i];
1016f1af5d2fSBarry Smith     /* multiply by the inverse of the block diagonal */
1017f1af5d2fSBarry Smith     x1    = t[idx];   x2 = t[1+idx]; x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1018f1af5d2fSBarry Smith     x6    = t[5+idx]; x7 = t[6+idx];
1019f1af5d2fSBarry Smith     s1 = v[0]*x1  +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5 +  v[5]*x6 +  v[6]*x7;
1020f1af5d2fSBarry Smith     s2 = v[7]*x1  +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1021f1af5d2fSBarry Smith     s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1022f1af5d2fSBarry Smith     s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1023f1af5d2fSBarry Smith     s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1024f1af5d2fSBarry Smith     s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1025f1af5d2fSBarry Smith     s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1026f1af5d2fSBarry Smith     v += 49;
1027f1af5d2fSBarry Smith 
1028f1af5d2fSBarry Smith     vi    = aj + diag[i] + 1;
1029f1af5d2fSBarry Smith     nz    = ai[i+1] - diag[i] - 1;
1030f1af5d2fSBarry Smith     while (nz--) {
1031f1af5d2fSBarry Smith       oidx = 7*(*vi++);
1032f1af5d2fSBarry Smith       t[oidx]   -= v[0]*s1  +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
1033f1af5d2fSBarry Smith       t[oidx+1] -= v[7]*s1  +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1034f1af5d2fSBarry Smith       t[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1035f1af5d2fSBarry Smith       t[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1036f1af5d2fSBarry Smith       t[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1037f1af5d2fSBarry Smith       t[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1038f1af5d2fSBarry Smith       t[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1039f1af5d2fSBarry Smith       v  += 49;
1040f1af5d2fSBarry Smith     }
1041f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1042f1af5d2fSBarry Smith     t[5+idx] = s6;t[6+idx] = s7;
1043f1af5d2fSBarry Smith     idx += 7;
1044f1af5d2fSBarry Smith   }
1045f1af5d2fSBarry Smith   /* backward solve the L^T */
1046f1af5d2fSBarry Smith   for (i=n-1; i>=0; i--){
1047f1af5d2fSBarry Smith     v    = aa + 49*diag[i] - 49;
1048f1af5d2fSBarry Smith     vi   = aj + diag[i] - 1;
1049f1af5d2fSBarry Smith     nz   = diag[i] - ai[i];
1050f1af5d2fSBarry Smith     idt  = 7*i;
1051f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1052f1af5d2fSBarry Smith     s6 = t[5+idt];s7 = t[6+idt];
1053f1af5d2fSBarry Smith     while (nz--) {
1054f1af5d2fSBarry Smith       idx   = 7*(*vi--);
1055f1af5d2fSBarry Smith       t[idx]   -=  v[0]*s1 +  v[1]*s2 +  v[2]*s3 +  v[3]*s4 +  v[4]*s5 +  v[5]*s6 +  v[6]*s7;
1056f1af5d2fSBarry Smith       t[idx+1] -=  v[7]*s1 +  v[8]*s2 +  v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1057f1af5d2fSBarry Smith       t[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1058f1af5d2fSBarry Smith       t[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1059f1af5d2fSBarry Smith       t[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1060f1af5d2fSBarry Smith       t[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1061f1af5d2fSBarry Smith       t[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1062f1af5d2fSBarry Smith       v -= 49;
1063f1af5d2fSBarry Smith     }
1064f1af5d2fSBarry Smith   }
1065f1af5d2fSBarry Smith 
1066f1af5d2fSBarry Smith   /* copy t into x according to permutation */
1067f1af5d2fSBarry Smith   ii = 0;
1068f1af5d2fSBarry Smith   for (i=0; i<n; i++) {
1069f1af5d2fSBarry Smith     ir      = 7*r[i];
1070f1af5d2fSBarry Smith     x[ir]   = t[ii];
1071f1af5d2fSBarry Smith     x[ir+1] = t[ii+1];
1072f1af5d2fSBarry Smith     x[ir+2] = t[ii+2];
1073f1af5d2fSBarry Smith     x[ir+3] = t[ii+3];
1074f1af5d2fSBarry Smith     x[ir+4] = t[ii+4];
1075f1af5d2fSBarry Smith     x[ir+5] = t[ii+5];
1076f1af5d2fSBarry Smith     x[ir+6] = t[ii+6];
1077f1af5d2fSBarry Smith     ii += 7;
1078f1af5d2fSBarry Smith   }
1079f1af5d2fSBarry Smith 
1080f1af5d2fSBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1081f1af5d2fSBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1082f1af5d2fSBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1083f1af5d2fSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1084*b0a32e0cSBarry Smith   PetscLogFlops(2*49*(a->nz) - 7*A->n);
1085f1af5d2fSBarry Smith   PetscFunctionReturn(0);
1086f1af5d2fSBarry Smith }
1087f1af5d2fSBarry Smith 
10884e2b4712SSatish Balay /* ----------------------------------------------------------- */
10894e2b4712SSatish Balay #undef __FUNC__
10909d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_N"
10914e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
10924e2b4712SSatish Balay {
10934e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
10944e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
10954e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
10964e2b4712SSatish Balay   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
10973f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1098f1af5d2fSBarry Smith   Scalar          *x,*b,*s,*t,*ls;
10994e2b4712SSatish Balay 
11004e2b4712SSatish Balay   PetscFunctionBegin;
1101e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1102e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1103f1af5d2fSBarry Smith   t  = a->solve_work;
11044e2b4712SSatish Balay 
11054e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
11064e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
11074e2b4712SSatish Balay 
11084e2b4712SSatish Balay   /* forward solve the lower triangular */
1109f1af5d2fSBarry Smith   ierr = PetscMemcpy(t,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
11104e2b4712SSatish Balay   for (i=1; i<n; i++) {
11114e2b4712SSatish Balay     v   = aa + bs2*ai[i];
11124e2b4712SSatish Balay     vi  = aj + ai[i];
11134e2b4712SSatish Balay     nz  = a->diag[i] - ai[i];
1114f1af5d2fSBarry Smith     s = t + bs*i;
1115f1af5d2fSBarry Smith     ierr = PetscMemcpy(s,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
11164e2b4712SSatish Balay     while (nz--) {
1117f1af5d2fSBarry Smith       Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*(*vi++));
11184e2b4712SSatish Balay       v += bs2;
11194e2b4712SSatish Balay     }
11204e2b4712SSatish Balay   }
11214e2b4712SSatish Balay   /* backward solve the upper triangular */
1122273d9f13SBarry Smith   ls = a->solve_work + A->n;
11234e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
11244e2b4712SSatish Balay     v   = aa + bs2*(a->diag[i] + 1);
11254e2b4712SSatish Balay     vi  = aj + a->diag[i] + 1;
11264e2b4712SSatish Balay     nz  = ai[i+1] - a->diag[i] - 1;
1127f1af5d2fSBarry Smith     ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
11284e2b4712SSatish Balay     while (nz--) {
1129f1af5d2fSBarry Smith       Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*(*vi++));
11304e2b4712SSatish Balay       v += bs2;
11314e2b4712SSatish Balay     }
1132f1af5d2fSBarry Smith     Kernel_w_gets_A_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs);
1133f1af5d2fSBarry Smith     ierr = PetscMemcpy(x + bs*(*c--),t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
11344e2b4712SSatish Balay   }
11354e2b4712SSatish Balay 
11364e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
11374e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1138e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1139e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1140*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->bs2)*(a->nz) - a->bs*A->n);
11414e2b4712SSatish Balay   PetscFunctionReturn(0);
11424e2b4712SSatish Balay }
11434e2b4712SSatish Balay 
11444e2b4712SSatish Balay #undef __FUNC__
11459d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7"
11464e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
11474e2b4712SSatish Balay {
11484e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
11494e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
11504e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
11514e2b4712SSatish Balay   int             *diag = a->diag;
11523f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1153f1af5d2fSBarry Smith   Scalar          s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
1154f1af5d2fSBarry Smith   Scalar          *x,*b,*t;
11554e2b4712SSatish Balay 
11564e2b4712SSatish Balay   PetscFunctionBegin;
1157e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1158e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1159f1af5d2fSBarry Smith   t  = a->solve_work;
11604e2b4712SSatish Balay 
11614e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
11624e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
11634e2b4712SSatish Balay 
11644e2b4712SSatish Balay   /* forward solve the lower triangular */
11654e2b4712SSatish Balay   idx    = 7*(*r++);
1166f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1167f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
1168f1af5d2fSBarry Smith   t[5] = b[5+idx]; t[6] = b[6+idx];
11694e2b4712SSatish Balay 
11704e2b4712SSatish Balay   for (i=1; i<n; i++) {
11714e2b4712SSatish Balay     v     = aa + 49*ai[i];
11724e2b4712SSatish Balay     vi    = aj + ai[i];
11734e2b4712SSatish Balay     nz    = diag[i] - ai[i];
11744e2b4712SSatish Balay     idx   = 7*(*r++);
1175f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1176f1af5d2fSBarry Smith     s5  = b[4+idx];s6 = b[5+idx];s7 = b[6+idx];
11774e2b4712SSatish Balay     while (nz--) {
11784e2b4712SSatish Balay       idx   = 7*(*vi++);
1179f1af5d2fSBarry Smith       x1    = t[idx];  x2 = t[1+idx];x3 = t[2+idx];
1180f1af5d2fSBarry Smith       x4    = t[3+idx];x5 = t[4+idx];
1181f1af5d2fSBarry Smith       x6    = t[5+idx];x7 = t[6+idx];
1182f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1183f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1184f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1185f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1186f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1187f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1188f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
11894e2b4712SSatish Balay       v += 49;
11904e2b4712SSatish Balay     }
11914e2b4712SSatish Balay     idx = 7*i;
1192f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1193f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1194f1af5d2fSBarry Smith     t[5+idx] = s6;t[6+idx] = s7;
11954e2b4712SSatish Balay   }
11964e2b4712SSatish Balay   /* backward solve the upper triangular */
11974e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
11984e2b4712SSatish Balay     v    = aa + 49*diag[i] + 49;
11994e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
12004e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
12014e2b4712SSatish Balay     idt  = 7*i;
1202f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1203f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1204f1af5d2fSBarry Smith     s6 = t[5+idt];s7 = t[6+idt];
12054e2b4712SSatish Balay     while (nz--) {
12064e2b4712SSatish Balay       idx   = 7*(*vi++);
1207f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1208f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1209f1af5d2fSBarry Smith       x6    = t[5+idx]; x7 = t[6+idx];
1210f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1211f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1212f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1213f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1214f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1215f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1216f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
12174e2b4712SSatish Balay       v += 49;
12184e2b4712SSatish Balay     }
12194e2b4712SSatish Balay     idc = 7*(*c--);
12204e2b4712SSatish Balay     v   = aa + 49*diag[i];
1221f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[7]*s2+v[14]*s3+
1222f1af5d2fSBarry Smith                                  v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7;
1223f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+
1224f1af5d2fSBarry Smith                                  v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7;
1225f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+
1226f1af5d2fSBarry Smith                                  v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7;
1227f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+
1228f1af5d2fSBarry Smith                                  v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7;
1229f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+
1230f1af5d2fSBarry Smith                                  v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7;
1231f1af5d2fSBarry Smith     x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+
1232f1af5d2fSBarry Smith                                  v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7;
1233f1af5d2fSBarry Smith     x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+
1234f1af5d2fSBarry Smith                                  v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7;
12354e2b4712SSatish Balay   }
12364e2b4712SSatish Balay 
12374e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
12384e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1239e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1240e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1241*b0a32e0cSBarry Smith   PetscLogFlops(2*49*(a->nz) - 7*A->n);
12424e2b4712SSatish Balay   PetscFunctionReturn(0);
12434e2b4712SSatish Balay }
12444e2b4712SSatish Balay 
12454e2b4712SSatish Balay #undef __FUNC__
12469d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering"
124715091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
124815091d37SBarry Smith {
124915091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
125015091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
125115091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
125215091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1253f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
125415091d37SBarry Smith 
125515091d37SBarry Smith   PetscFunctionBegin;
125615091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
125715091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
125815091d37SBarry Smith   /* forward solve the lower triangular */
125915091d37SBarry Smith   idx    = 0;
126015091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
126115091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
126215091d37SBarry Smith   x[6] = b[6+idx];
126315091d37SBarry Smith   for (i=1; i<n; i++) {
126415091d37SBarry Smith     v     =  aa + 49*ai[i];
126515091d37SBarry Smith     vi    =  aj + ai[i];
126615091d37SBarry Smith     nz    =  diag[i] - ai[i];
126715091d37SBarry Smith     idx   =  7*i;
1268f1af5d2fSBarry Smith     s1  =  b[idx];   s2 = b[1+idx]; s3 = b[2+idx];
1269f1af5d2fSBarry Smith     s4  =  b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
1270f1af5d2fSBarry Smith     s7  =  b[6+idx];
127115091d37SBarry Smith     while (nz--) {
127215091d37SBarry Smith       jdx   = 7*(*vi++);
127315091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
127415091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
127515091d37SBarry Smith       x7    = x[6+jdx];
1276f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1277f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1278f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1279f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1280f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1281f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1282f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
128315091d37SBarry Smith       v += 49;
128415091d37SBarry Smith      }
1285f1af5d2fSBarry Smith     x[idx]   = s1;
1286f1af5d2fSBarry Smith     x[1+idx] = s2;
1287f1af5d2fSBarry Smith     x[2+idx] = s3;
1288f1af5d2fSBarry Smith     x[3+idx] = s4;
1289f1af5d2fSBarry Smith     x[4+idx] = s5;
1290f1af5d2fSBarry Smith     x[5+idx] = s6;
1291f1af5d2fSBarry Smith     x[6+idx] = s7;
129215091d37SBarry Smith   }
129315091d37SBarry Smith   /* backward solve the upper triangular */
129415091d37SBarry Smith   for (i=n-1; i>=0; i--){
129515091d37SBarry Smith     v    = aa + 49*diag[i] + 49;
129615091d37SBarry Smith     vi   = aj + diag[i] + 1;
129715091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
129815091d37SBarry Smith     idt  = 7*i;
1299f1af5d2fSBarry Smith     s1 = x[idt];   s2 = x[1+idt];
1300f1af5d2fSBarry Smith     s3 = x[2+idt]; s4 = x[3+idt];
1301f1af5d2fSBarry Smith     s5 = x[4+idt]; s6 = x[5+idt];
1302f1af5d2fSBarry Smith     s7 = x[6+idt];
130315091d37SBarry Smith     while (nz--) {
130415091d37SBarry Smith       idx   = 7*(*vi++);
130515091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
130615091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
130715091d37SBarry Smith       x7    = x[6+idx];
1308f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1309f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1310f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1311f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1312f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1313f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1314f1af5d2fSBarry Smith       s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
131515091d37SBarry Smith       v += 49;
131615091d37SBarry Smith     }
131715091d37SBarry Smith     v        = aa + 49*diag[i];
1318f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[7]*s2  + v[14]*s3 + v[21]*s4
1319f1af5d2fSBarry Smith                          + v[28]*s5 + v[35]*s6 + v[42]*s7;
1320f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[8]*s2  + v[15]*s3 + v[22]*s4
1321f1af5d2fSBarry Smith                          + v[29]*s5 + v[36]*s6 + v[43]*s7;
1322f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[9]*s2  + v[16]*s3 + v[23]*s4
1323f1af5d2fSBarry Smith                          + v[30]*s5 + v[37]*s6 + v[44]*s7;
1324f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[10]*s2  + v[17]*s3 + v[24]*s4
1325f1af5d2fSBarry Smith                          + v[31]*s5 + v[38]*s6 + v[45]*s7;
1326f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[11]*s2  + v[18]*s3 + v[25]*s4
1327f1af5d2fSBarry Smith                          + v[32]*s5 + v[39]*s6 + v[46]*s7;
1328f1af5d2fSBarry Smith     x[5+idt] = v[5]*s1 + v[12]*s2  + v[19]*s3 + v[26]*s4
1329f1af5d2fSBarry Smith                          + v[33]*s5 + v[40]*s6 + v[47]*s7;
1330f1af5d2fSBarry Smith     x[6+idt] = v[6]*s1 + v[13]*s2  + v[20]*s3 + v[27]*s4
1331f1af5d2fSBarry Smith                          + v[34]*s5 + v[41]*s6 + v[48]*s7;
133215091d37SBarry Smith   }
133315091d37SBarry Smith 
133415091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
133515091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1336*b0a32e0cSBarry Smith   PetscLogFlops(2*36*(a->nz) - 6*A->n);
133715091d37SBarry Smith   PetscFunctionReturn(0);
133815091d37SBarry Smith }
133915091d37SBarry Smith 
134015091d37SBarry Smith #undef __FUNC__
13419d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6"
134215091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
134315091d37SBarry Smith {
134415091d37SBarry Smith   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
134515091d37SBarry Smith   IS              iscol=a->col,isrow=a->row;
134615091d37SBarry Smith   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
134715091d37SBarry Smith   int             *diag = a->diag;
134815091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1349f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t;
135015091d37SBarry Smith 
135115091d37SBarry Smith   PetscFunctionBegin;
135215091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
135315091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1354f1af5d2fSBarry Smith   t  = a->solve_work;
135515091d37SBarry Smith 
135615091d37SBarry Smith   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
135715091d37SBarry Smith   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
135815091d37SBarry Smith 
135915091d37SBarry Smith   /* forward solve the lower triangular */
136015091d37SBarry Smith   idx    = 6*(*r++);
1361f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1362f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx];
1363f1af5d2fSBarry Smith   t[4] = b[4+idx]; t[5] = b[5+idx];
136415091d37SBarry Smith   for (i=1; i<n; i++) {
136515091d37SBarry Smith     v     = aa + 36*ai[i];
136615091d37SBarry Smith     vi    = aj + ai[i];
136715091d37SBarry Smith     nz    = diag[i] - ai[i];
136815091d37SBarry Smith     idx   = 6*(*r++);
1369f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1370f1af5d2fSBarry Smith     s5  = b[4+idx]; s6 = b[5+idx];
137115091d37SBarry Smith     while (nz--) {
137215091d37SBarry Smith       idx   = 6*(*vi++);
1373f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx]; x3 = t[2+idx];
1374f1af5d2fSBarry Smith       x4    = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx];
1375f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1376f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1377f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1378f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1379f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1380f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
138115091d37SBarry Smith       v += 36;
138215091d37SBarry Smith     }
138315091d37SBarry Smith     idx = 6*i;
1384f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1385f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4;
1386f1af5d2fSBarry Smith     t[4+idx] = s5;t[5+idx] = s6;
138715091d37SBarry Smith   }
138815091d37SBarry Smith   /* backward solve the upper triangular */
138915091d37SBarry Smith   for (i=n-1; i>=0; i--){
139015091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
139115091d37SBarry Smith     vi   = aj + diag[i] + 1;
139215091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
139315091d37SBarry Smith     idt  = 6*i;
1394f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1395f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt];
1396f1af5d2fSBarry Smith     s5 = t[4+idt];s6 = t[5+idt];
139715091d37SBarry Smith     while (nz--) {
139815091d37SBarry Smith       idx   = 6*(*vi++);
1399f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1400f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx];
1401f1af5d2fSBarry Smith       x5    = t[4+idx]; x6 = t[5+idx];
1402f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1403f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1404f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1405f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1406f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1407f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
140815091d37SBarry Smith       v += 36;
140915091d37SBarry Smith     }
141015091d37SBarry Smith     idc = 6*(*c--);
141115091d37SBarry Smith     v   = aa + 36*diag[i];
1412f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[6]*s2+v[12]*s3+
1413f1af5d2fSBarry Smith                                  v[18]*s4+v[24]*s5+v[30]*s6;
1414f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+
1415f1af5d2fSBarry Smith                                  v[19]*s4+v[25]*s5+v[31]*s6;
1416f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+
1417f1af5d2fSBarry Smith                                  v[20]*s4+v[26]*s5+v[32]*s6;
1418f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+
1419f1af5d2fSBarry Smith                                  v[21]*s4+v[27]*s5+v[33]*s6;
1420f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+
1421f1af5d2fSBarry Smith                                  v[22]*s4+v[28]*s5+v[34]*s6;
1422f1af5d2fSBarry Smith     x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+
1423f1af5d2fSBarry Smith                                  v[23]*s4+v[29]*s5+v[35]*s6;
142415091d37SBarry Smith   }
142515091d37SBarry Smith 
142615091d37SBarry Smith   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
142715091d37SBarry Smith   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
142815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
142915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1430*b0a32e0cSBarry Smith   PetscLogFlops(2*36*(a->nz) - 6*A->n);
143115091d37SBarry Smith   PetscFunctionReturn(0);
143215091d37SBarry Smith }
143315091d37SBarry Smith 
143415091d37SBarry Smith #undef __FUNC__
14359d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering"
143615091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
143715091d37SBarry Smith {
143815091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
143915091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
144015091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
144115091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1442f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
144315091d37SBarry Smith 
144415091d37SBarry Smith   PetscFunctionBegin;
144515091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
144615091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
144715091d37SBarry Smith   /* forward solve the lower triangular */
144815091d37SBarry Smith   idx    = 0;
144915091d37SBarry Smith   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
145015091d37SBarry Smith   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
145115091d37SBarry Smith   for (i=1; i<n; i++) {
145215091d37SBarry Smith     v     =  aa + 36*ai[i];
145315091d37SBarry Smith     vi    =  aj + ai[i];
145415091d37SBarry Smith     nz    =  diag[i] - ai[i];
145515091d37SBarry Smith     idx   =  6*i;
1456f1af5d2fSBarry Smith     s1  =  b[idx];   s2 = b[1+idx]; s3 = b[2+idx];
1457f1af5d2fSBarry Smith     s4  =  b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
145815091d37SBarry Smith     while (nz--) {
145915091d37SBarry Smith       jdx   = 6*(*vi++);
146015091d37SBarry Smith       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
146115091d37SBarry Smith       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
1462f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1463f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1464f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1465f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1466f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1467f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
146815091d37SBarry Smith       v += 36;
146915091d37SBarry Smith      }
1470f1af5d2fSBarry Smith     x[idx]   = s1;
1471f1af5d2fSBarry Smith     x[1+idx] = s2;
1472f1af5d2fSBarry Smith     x[2+idx] = s3;
1473f1af5d2fSBarry Smith     x[3+idx] = s4;
1474f1af5d2fSBarry Smith     x[4+idx] = s5;
1475f1af5d2fSBarry Smith     x[5+idx] = s6;
147615091d37SBarry Smith   }
147715091d37SBarry Smith   /* backward solve the upper triangular */
147815091d37SBarry Smith   for (i=n-1; i>=0; i--){
147915091d37SBarry Smith     v    = aa + 36*diag[i] + 36;
148015091d37SBarry Smith     vi   = aj + diag[i] + 1;
148115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
148215091d37SBarry Smith     idt  = 6*i;
1483f1af5d2fSBarry Smith     s1 = x[idt];   s2 = x[1+idt];
1484f1af5d2fSBarry Smith     s3 = x[2+idt]; s4 = x[3+idt];
1485f1af5d2fSBarry Smith     s5 = x[4+idt]; s6 = x[5+idt];
148615091d37SBarry Smith     while (nz--) {
148715091d37SBarry Smith       idx   = 6*(*vi++);
148815091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
148915091d37SBarry Smith       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
1490f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1491f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1492f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1493f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1494f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1495f1af5d2fSBarry Smith       s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
149615091d37SBarry Smith       v += 36;
149715091d37SBarry Smith     }
149815091d37SBarry Smith     v        = aa + 36*diag[i];
1499f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[6]*s2  + v[12]*s3 + v[18]*s4 + v[24]*s5 + v[30]*s6;
1500f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[7]*s2  + v[13]*s3 + v[19]*s4 + v[25]*s5 + v[31]*s6;
1501f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[8]*s2  + v[14]*s3 + v[20]*s4 + v[26]*s5 + v[32]*s6;
1502f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[9]*s2  + v[15]*s3 + v[21]*s4 + v[27]*s5 + v[33]*s6;
1503f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[10]*s2 + v[16]*s3 + v[22]*s4 + v[28]*s5 + v[34]*s6;
1504f1af5d2fSBarry Smith     x[5+idt] = v[5]*s1 + v[11]*s2 + v[17]*s3 + v[23]*s4 + v[29]*s5 + v[35]*s6;
150515091d37SBarry Smith   }
150615091d37SBarry Smith 
150715091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
150815091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1509*b0a32e0cSBarry Smith   PetscLogFlops(2*36*(a->nz) - 6*A->n);
151015091d37SBarry Smith   PetscFunctionReturn(0);
151115091d37SBarry Smith }
151215091d37SBarry Smith 
151315091d37SBarry Smith #undef __FUNC__
15149d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5"
15154e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
15164e2b4712SSatish Balay {
15174e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
15184e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
15194e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
15204e2b4712SSatish Balay   int             *diag = a->diag;
15213f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1522f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t;
15234e2b4712SSatish Balay 
15244e2b4712SSatish Balay   PetscFunctionBegin;
1525e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1526e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1527f1af5d2fSBarry Smith   t  = a->solve_work;
15284e2b4712SSatish Balay 
15294e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
15304e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
15314e2b4712SSatish Balay 
15324e2b4712SSatish Balay   /* forward solve the lower triangular */
15334e2b4712SSatish Balay   idx    = 5*(*r++);
1534f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1535f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
15364e2b4712SSatish Balay   for (i=1; i<n; i++) {
15374e2b4712SSatish Balay     v     = aa + 25*ai[i];
15384e2b4712SSatish Balay     vi    = aj + ai[i];
15394e2b4712SSatish Balay     nz    = diag[i] - ai[i];
15404e2b4712SSatish Balay     idx   = 5*(*r++);
1541f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1542f1af5d2fSBarry Smith     s5  = b[4+idx];
15434e2b4712SSatish Balay     while (nz--) {
15444e2b4712SSatish Balay       idx   = 5*(*vi++);
1545f1af5d2fSBarry Smith       x1    = t[idx];  x2 = t[1+idx];x3 = t[2+idx];
1546f1af5d2fSBarry Smith       x4    = t[3+idx];x5 = t[4+idx];
1547f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1548f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1549f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1550f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1551f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
15524e2b4712SSatish Balay       v += 25;
15534e2b4712SSatish Balay     }
15544e2b4712SSatish Balay     idx = 5*i;
1555f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1556f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
15574e2b4712SSatish Balay   }
15584e2b4712SSatish Balay   /* backward solve the upper triangular */
15594e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
15604e2b4712SSatish Balay     v    = aa + 25*diag[i] + 25;
15614e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
15624e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
15634e2b4712SSatish Balay     idt  = 5*i;
1564f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1565f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
15664e2b4712SSatish Balay     while (nz--) {
15674e2b4712SSatish Balay       idx   = 5*(*vi++);
1568f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1569f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1570f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1571f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1572f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1573f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1574f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
15754e2b4712SSatish Balay       v += 25;
15764e2b4712SSatish Balay     }
15774e2b4712SSatish Balay     idc = 5*(*c--);
15784e2b4712SSatish Balay     v   = aa + 25*diag[i];
1579f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[5]*s2+v[10]*s3+
1580f1af5d2fSBarry Smith                                  v[15]*s4+v[20]*s5;
1581f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+
1582f1af5d2fSBarry Smith                                  v[16]*s4+v[21]*s5;
1583f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+
1584f1af5d2fSBarry Smith                                  v[17]*s4+v[22]*s5;
1585f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+
1586f1af5d2fSBarry Smith                                  v[18]*s4+v[23]*s5;
1587f1af5d2fSBarry Smith     x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+
1588f1af5d2fSBarry Smith                                  v[19]*s4+v[24]*s5;
15894e2b4712SSatish Balay   }
15904e2b4712SSatish Balay 
15914e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
15924e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1593e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1594e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1595*b0a32e0cSBarry Smith   PetscLogFlops(2*25*(a->nz) - 5*A->n);
15964e2b4712SSatish Balay   PetscFunctionReturn(0);
15974e2b4712SSatish Balay }
15984e2b4712SSatish Balay 
15994e2b4712SSatish Balay #undef __FUNC__
16009d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
160115091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
160215091d37SBarry Smith {
160315091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
160415091d37SBarry Smith   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
160515091d37SBarry Smith   int             ierr,*diag = a->diag,jdx;
160615091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1607329f5518SBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
160815091d37SBarry Smith 
160915091d37SBarry Smith   PetscFunctionBegin;
161015091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
161115091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161215091d37SBarry Smith   /* forward solve the lower triangular */
161315091d37SBarry Smith   idx    = 0;
161415091d37SBarry Smith   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
161515091d37SBarry Smith   for (i=1; i<n; i++) {
161615091d37SBarry Smith     v     =  aa + 25*ai[i];
161715091d37SBarry Smith     vi    =  aj + ai[i];
161815091d37SBarry Smith     nz    =  diag[i] - ai[i];
161915091d37SBarry Smith     idx   =  5*i;
1620f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx];
162115091d37SBarry Smith     while (nz--) {
162215091d37SBarry Smith       jdx   = 5*(*vi++);
162315091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
1624f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1625f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1626f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1627f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1628f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
162915091d37SBarry Smith       v    += 25;
163015091d37SBarry Smith     }
1631f1af5d2fSBarry Smith     x[idx]   = s1;
1632f1af5d2fSBarry Smith     x[1+idx] = s2;
1633f1af5d2fSBarry Smith     x[2+idx] = s3;
1634f1af5d2fSBarry Smith     x[3+idx] = s4;
1635f1af5d2fSBarry Smith     x[4+idx] = s5;
163615091d37SBarry Smith   }
163715091d37SBarry Smith   /* backward solve the upper triangular */
163815091d37SBarry Smith   for (i=n-1; i>=0; i--){
163915091d37SBarry Smith     v    = aa + 25*diag[i] + 25;
164015091d37SBarry Smith     vi   = aj + diag[i] + 1;
164115091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
164215091d37SBarry Smith     idt  = 5*i;
1643f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1644f1af5d2fSBarry Smith     s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
164515091d37SBarry Smith     while (nz--) {
164615091d37SBarry Smith       idx   = 5*(*vi++);
164715091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
1648f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1649f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1650f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1651f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1652f1af5d2fSBarry Smith       s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
165315091d37SBarry Smith       v    += 25;
165415091d37SBarry Smith     }
165515091d37SBarry Smith     v        = aa + 25*diag[i];
1656f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[5]*s2 + v[10]*s3  + v[15]*s4 + v[20]*s5;
1657f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3  + v[16]*s4 + v[21]*s5;
1658f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3  + v[17]*s4 + v[22]*s5;
1659f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3  + v[18]*s4 + v[23]*s5;
1660f1af5d2fSBarry Smith     x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3  + v[19]*s4 + v[24]*s5;
166115091d37SBarry Smith   }
166215091d37SBarry Smith 
166315091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
166415091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1665*b0a32e0cSBarry Smith   PetscLogFlops(2*25*(a->nz) - 5*A->n);
166615091d37SBarry Smith   PetscFunctionReturn(0);
166715091d37SBarry Smith }
166815091d37SBarry Smith 
166915091d37SBarry Smith #undef __FUNC__
16709d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_4"
16714e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
16724e2b4712SSatish Balay {
16734e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
16744e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
16754e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
16764e2b4712SSatish Balay   int             *diag = a->diag;
16773f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1678f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,s4,x1,x2,x3,x4,*t;
16794e2b4712SSatish Balay 
16804e2b4712SSatish Balay   PetscFunctionBegin;
1681e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1682e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1683f1af5d2fSBarry Smith   t  = a->solve_work;
16844e2b4712SSatish Balay 
16854e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
16864e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
16874e2b4712SSatish Balay 
16884e2b4712SSatish Balay   /* forward solve the lower triangular */
16894e2b4712SSatish Balay   idx    = 4*(*r++);
1690f1af5d2fSBarry Smith   t[0] = b[idx];   t[1] = b[1+idx];
1691f1af5d2fSBarry Smith   t[2] = b[2+idx]; t[3] = b[3+idx];
16924e2b4712SSatish Balay   for (i=1; i<n; i++) {
16934e2b4712SSatish Balay     v     = aa + 16*ai[i];
16944e2b4712SSatish Balay     vi    = aj + ai[i];
16954e2b4712SSatish Balay     nz    = diag[i] - ai[i];
16964e2b4712SSatish Balay     idx   = 4*(*r++);
1697f1af5d2fSBarry Smith     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
16984e2b4712SSatish Balay     while (nz--) {
16994e2b4712SSatish Balay       idx   = 4*(*vi++);
1700f1af5d2fSBarry Smith       x1    = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx];
1701f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
1702f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
1703f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1704f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
17054e2b4712SSatish Balay       v    += 16;
17064e2b4712SSatish Balay     }
17074e2b4712SSatish Balay     idx        = 4*i;
1708f1af5d2fSBarry Smith     t[idx]   = s1;t[1+idx] = s2;
1709f1af5d2fSBarry Smith     t[2+idx] = s3;t[3+idx] = s4;
17104e2b4712SSatish Balay   }
17114e2b4712SSatish Balay   /* backward solve the upper triangular */
17124e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
17134e2b4712SSatish Balay     v    = aa + 16*diag[i] + 16;
17144e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
17154e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
17164e2b4712SSatish Balay     idt  = 4*i;
1717f1af5d2fSBarry Smith     s1 = t[idt];  s2 = t[1+idt];
1718f1af5d2fSBarry Smith     s3 = t[2+idt];s4 = t[3+idt];
17194e2b4712SSatish Balay     while (nz--) {
17204e2b4712SSatish Balay       idx   = 4*(*vi++);
1721f1af5d2fSBarry Smith       x1    = t[idx];   x2 = t[1+idx];
1722f1af5d2fSBarry Smith       x3    = t[2+idx]; x4 = t[3+idx];
1723f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1724f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1725f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1726f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
17274e2b4712SSatish Balay       v += 16;
17284e2b4712SSatish Balay     }
17294e2b4712SSatish Balay     idc      = 4*(*c--);
17304e2b4712SSatish Balay     v        = aa + 16*diag[i];
1731f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4;
1732f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4;
1733f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4;
1734f1af5d2fSBarry Smith     x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4;
17354e2b4712SSatish Balay   }
17364e2b4712SSatish Balay 
17374e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
17384e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1739e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1740e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1741*b0a32e0cSBarry Smith   PetscLogFlops(2*16*(a->nz) - 4*A->n);
17424e2b4712SSatish Balay   PetscFunctionReturn(0);
17434e2b4712SSatish Balay }
17440ef38995SBarry Smith 
17450ef38995SBarry Smith 
17464e2b4712SSatish Balay /*
17474e2b4712SSatish Balay       Special case where the matrix was ILU(0) factored in the natural
17484e2b4712SSatish Balay    ordering. This eliminates the need for the column and row permutation.
17494e2b4712SSatish Balay */
17504e2b4712SSatish Balay #undef __FUNC__
17519d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
17524e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
17534e2b4712SSatish Balay {
17544e2b4712SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
175530d4dcafSBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
175630d4dcafSBarry Smith   int             ierr,*diag = a->diag;
17573f1db9ecSBarry Smith   MatScalar       *aa=a->a;
175830d4dcafSBarry Smith   Scalar          *x,*b;
17594e2b4712SSatish Balay 
17604e2b4712SSatish Balay   PetscFunctionBegin;
1761e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1762e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
17634e2b4712SSatish Balay 
1764aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
17652853dc0eSBarry Smith   {
17662853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
17672853dc0eSBarry Smith     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
17682853dc0eSBarry Smith   }
1769aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
17702853dc0eSBarry Smith   {
17712853dc0eSBarry Smith     static Scalar w[2000]; /* very BAD need to fix */
17722853dc0eSBarry Smith     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
17732853dc0eSBarry Smith   }
1774aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
17752853dc0eSBarry Smith   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
1776e1293385SBarry Smith #else
177730d4dcafSBarry Smith   {
1778f1af5d2fSBarry Smith     Scalar    s1,s2,s3,s4,x1,x2,x3,x4;
17793f1db9ecSBarry Smith     MatScalar *v;
17804e555682SBarry Smith     int       jdx,idt,idx,nz,*vi,i,ai16;
1781e1293385SBarry Smith 
17824e2b4712SSatish Balay   /* forward solve the lower triangular */
17834e2b4712SSatish Balay   idx    = 0;
1784e1293385SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
17854e2b4712SSatish Balay   for (i=1; i<n; i++) {
17864e2b4712SSatish Balay     v     =  aa      + 16*ai[i];
17874e2b4712SSatish Balay     vi    =  aj      + ai[i];
17884e2b4712SSatish Balay     nz    =  diag[i] - ai[i];
1789e1293385SBarry Smith     idx   +=  4;
1790f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
17914e2b4712SSatish Balay     while (nz--) {
17924e2b4712SSatish Balay       jdx   = 4*(*vi++);
17934e2b4712SSatish Balay       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
1794f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
1795f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
1796f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1797f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
17984e2b4712SSatish Balay       v    += 16;
17994e2b4712SSatish Balay     }
1800f1af5d2fSBarry Smith     x[idx]   = s1;
1801f1af5d2fSBarry Smith     x[1+idx] = s2;
1802f1af5d2fSBarry Smith     x[2+idx] = s3;
1803f1af5d2fSBarry Smith     x[3+idx] = s4;
18044e2b4712SSatish Balay   }
18054e2b4712SSatish Balay   /* backward solve the upper triangular */
18064e555682SBarry Smith   idt = 4*(n-1);
18074e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
18084e555682SBarry Smith     ai16 = 16*diag[i];
18094e555682SBarry Smith     v    = aa + ai16 + 16;
18104e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
18114e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
1812f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1813f1af5d2fSBarry Smith     s3 = x[2+idt];s4 = x[3+idt];
18144e2b4712SSatish Balay     while (nz--) {
18154e2b4712SSatish Balay       idx   = 4*(*vi++);
18164e2b4712SSatish Balay       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
1817f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1818f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1819f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1820f1af5d2fSBarry Smith       s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
18214e2b4712SSatish Balay       v    += 16;
18224e2b4712SSatish Balay     }
18234e555682SBarry Smith     v        = aa + ai16;
1824f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[4]*s2 + v[8]*s3  + v[12]*s4;
1825f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[5]*s2 + v[9]*s3  + v[13]*s4;
1826f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[6]*s2 + v[10]*s3 + v[14]*s4;
1827f1af5d2fSBarry Smith     x[3+idt] = v[3]*s1 + v[7]*s2 + v[11]*s3 + v[15]*s4;
1828329f5518SBarry Smith     idt -= 4;
18294e2b4712SSatish Balay   }
183030d4dcafSBarry Smith   }
1831e1293385SBarry Smith #endif
18324e2b4712SSatish Balay 
1833e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1834e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1835*b0a32e0cSBarry Smith   PetscLogFlops(2*16*(a->nz) - 4*A->n);
18364e2b4712SSatish Balay   PetscFunctionReturn(0);
18374e2b4712SSatish Balay }
18384e2b4712SSatish Balay 
1839667159a5SBarry Smith #undef __FUNC__
18409d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3"
18414e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
18424e2b4712SSatish Balay {
18434e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
18444e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
18454e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
18464e2b4712SSatish Balay   int             *diag = a->diag;
18473f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1848f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,x1,x2,x3,*t;
18494e2b4712SSatish Balay 
18504e2b4712SSatish Balay   PetscFunctionBegin;
1851e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1852e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1853f1af5d2fSBarry Smith   t  = a->solve_work;
18544e2b4712SSatish Balay 
18554e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
18564e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
18574e2b4712SSatish Balay 
18584e2b4712SSatish Balay   /* forward solve the lower triangular */
18594e2b4712SSatish Balay   idx    = 3*(*r++);
1860f1af5d2fSBarry Smith   t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx];
18614e2b4712SSatish Balay   for (i=1; i<n; i++) {
18624e2b4712SSatish Balay     v     = aa + 9*ai[i];
18634e2b4712SSatish Balay     vi    = aj + ai[i];
18644e2b4712SSatish Balay     nz    = diag[i] - ai[i];
18654e2b4712SSatish Balay     idx   = 3*(*r++);
1866f1af5d2fSBarry Smith     s1  = b[idx]; s2 = b[1+idx]; s3 = b[2+idx];
18674e2b4712SSatish Balay     while (nz--) {
18684e2b4712SSatish Balay       idx   = 3*(*vi++);
1869f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1870f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1871f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1872f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
18734e2b4712SSatish Balay       v += 9;
18744e2b4712SSatish Balay     }
18754e2b4712SSatish Balay     idx = 3*i;
1876f1af5d2fSBarry Smith     t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3;
18774e2b4712SSatish Balay   }
18784e2b4712SSatish Balay   /* backward solve the upper triangular */
18794e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
18804e2b4712SSatish Balay     v    = aa + 9*diag[i] + 9;
18814e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
18824e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
18834e2b4712SSatish Balay     idt  = 3*i;
1884f1af5d2fSBarry Smith     s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
18854e2b4712SSatish Balay     while (nz--) {
18864e2b4712SSatish Balay       idx   = 3*(*vi++);
1887f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1888f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1889f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1890f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
18914e2b4712SSatish Balay       v += 9;
18924e2b4712SSatish Balay     }
18934e2b4712SSatish Balay     idc = 3*(*c--);
18944e2b4712SSatish Balay     v   = aa + 9*diag[i];
1895f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
1896f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1897f1af5d2fSBarry Smith     x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
18984e2b4712SSatish Balay   }
18994e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
19004e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1901e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1902e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1903*b0a32e0cSBarry Smith   PetscLogFlops(2*9*(a->nz) - 3*A->n);
19044e2b4712SSatish Balay   PetscFunctionReturn(0);
19054e2b4712SSatish Balay }
19064e2b4712SSatish Balay 
190715091d37SBarry Smith /*
190815091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
190915091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
191015091d37SBarry Smith */
191115091d37SBarry Smith #undef __FUNC__
19129d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering"
191315091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
191415091d37SBarry Smith {
191515091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
191615091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
191715091d37SBarry Smith   int             ierr,*diag = a->diag;
191815091d37SBarry Smith   MatScalar       *aa=a->a,*v;
1919f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,s3,x1,x2,x3;
192015091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
192115091d37SBarry Smith 
192215091d37SBarry Smith   PetscFunctionBegin;
192315091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
192415091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
192515091d37SBarry Smith 
192615091d37SBarry Smith 
192715091d37SBarry Smith   /* forward solve the lower triangular */
192815091d37SBarry Smith   idx    = 0;
192915091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2];
193015091d37SBarry Smith   for (i=1; i<n; i++) {
193115091d37SBarry Smith     v     =  aa      + 9*ai[i];
193215091d37SBarry Smith     vi    =  aj      + ai[i];
193315091d37SBarry Smith     nz    =  diag[i] - ai[i];
193415091d37SBarry Smith     idx   +=  3;
1935f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];s3 = b[2+idx];
193615091d37SBarry Smith     while (nz--) {
193715091d37SBarry Smith       jdx   = 3*(*vi++);
193815091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
1939f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1940f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1941f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
194215091d37SBarry Smith       v    += 9;
194315091d37SBarry Smith     }
1944f1af5d2fSBarry Smith     x[idx]   = s1;
1945f1af5d2fSBarry Smith     x[1+idx] = s2;
1946f1af5d2fSBarry Smith     x[2+idx] = s3;
194715091d37SBarry Smith   }
194815091d37SBarry Smith   /* backward solve the upper triangular */
194915091d37SBarry Smith   for (i=n-1; i>=0; i--){
195015091d37SBarry Smith     v    = aa + 9*diag[i] + 9;
195115091d37SBarry Smith     vi   = aj + diag[i] + 1;
195215091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
195315091d37SBarry Smith     idt  = 3*i;
1954f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
1955f1af5d2fSBarry Smith     s3 = x[2+idt];
195615091d37SBarry Smith     while (nz--) {
195715091d37SBarry Smith       idx   = 3*(*vi++);
195815091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
1959f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1960f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1961f1af5d2fSBarry Smith       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
196215091d37SBarry Smith       v    += 9;
196315091d37SBarry Smith     }
196415091d37SBarry Smith     v        = aa +  9*diag[i];
1965f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
1966f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1967f1af5d2fSBarry Smith     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
196815091d37SBarry Smith   }
196915091d37SBarry Smith 
197015091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
197115091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1972*b0a32e0cSBarry Smith   PetscLogFlops(2*9*(a->nz) - 3*A->n);
197315091d37SBarry Smith   PetscFunctionReturn(0);
197415091d37SBarry Smith }
197515091d37SBarry Smith 
19764e2b4712SSatish Balay #undef __FUNC__
19779d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2"
19784e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
19794e2b4712SSatish Balay {
19804e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
19814e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
19824e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
19834e2b4712SSatish Balay   int             *diag = a->diag;
19843f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
1985f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,x1,x2,*t;
19864e2b4712SSatish Balay 
19874e2b4712SSatish Balay   PetscFunctionBegin;
1988e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1989e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1990f1af5d2fSBarry Smith   t  = a->solve_work;
19914e2b4712SSatish Balay 
19924e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
19934e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
19944e2b4712SSatish Balay 
19954e2b4712SSatish Balay   /* forward solve the lower triangular */
19964e2b4712SSatish Balay   idx    = 2*(*r++);
1997f1af5d2fSBarry Smith   t[0] = b[idx]; t[1] = b[1+idx];
19984e2b4712SSatish Balay   for (i=1; i<n; i++) {
19994e2b4712SSatish Balay     v     = aa + 4*ai[i];
20004e2b4712SSatish Balay     vi    = aj + ai[i];
20014e2b4712SSatish Balay     nz    = diag[i] - ai[i];
20024e2b4712SSatish Balay     idx   = 2*(*r++);
2003f1af5d2fSBarry Smith     s1  = b[idx]; s2 = b[1+idx];
20044e2b4712SSatish Balay     while (nz--) {
20054e2b4712SSatish Balay       idx   = 2*(*vi++);
2006f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx];
2007f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2008f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
20094e2b4712SSatish Balay       v += 4;
20104e2b4712SSatish Balay     }
20114e2b4712SSatish Balay     idx = 2*i;
2012f1af5d2fSBarry Smith     t[idx] = s1; t[1+idx] = s2;
20134e2b4712SSatish Balay   }
20144e2b4712SSatish Balay   /* backward solve the upper triangular */
20154e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
20164e2b4712SSatish Balay     v    = aa + 4*diag[i] + 4;
20174e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
20184e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
20194e2b4712SSatish Balay     idt  = 2*i;
2020f1af5d2fSBarry Smith     s1 = t[idt]; s2 = t[1+idt];
20214e2b4712SSatish Balay     while (nz--) {
20224e2b4712SSatish Balay       idx   = 2*(*vi++);
2023f1af5d2fSBarry Smith       x1    = t[idx]; x2 = t[1+idx];
2024f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2025f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
20264e2b4712SSatish Balay       v += 4;
20274e2b4712SSatish Balay     }
20284e2b4712SSatish Balay     idc = 2*(*c--);
20294e2b4712SSatish Balay     v   = aa + 4*diag[i];
2030f1af5d2fSBarry Smith     x[idc]   = t[idt]   = v[0]*s1 + v[2]*s2;
2031f1af5d2fSBarry Smith     x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2;
20324e2b4712SSatish Balay   }
20334e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
20344e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
2035e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2036e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2037*b0a32e0cSBarry Smith   PetscLogFlops(2*4*(a->nz) - 2*A->n);
20384e2b4712SSatish Balay   PetscFunctionReturn(0);
20394e2b4712SSatish Balay }
20404e2b4712SSatish Balay 
204115091d37SBarry Smith /*
204215091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
204315091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
204415091d37SBarry Smith */
204515091d37SBarry Smith #undef __FUNC__
20469d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering"
204715091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
204815091d37SBarry Smith {
204915091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
205015091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
205115091d37SBarry Smith   int             ierr,*diag = a->diag;
205215091d37SBarry Smith   MatScalar       *aa=a->a,*v;
2053f1af5d2fSBarry Smith   Scalar          *x,*b,s1,s2,x1,x2;
205415091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
205515091d37SBarry Smith 
205615091d37SBarry Smith   PetscFunctionBegin;
205715091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
205815091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
205915091d37SBarry Smith 
206015091d37SBarry Smith   /* forward solve the lower triangular */
206115091d37SBarry Smith   idx    = 0;
206215091d37SBarry Smith   x[0]   = b[0]; x[1] = b[1];
206315091d37SBarry Smith   for (i=1; i<n; i++) {
206415091d37SBarry Smith     v     =  aa      + 4*ai[i];
206515091d37SBarry Smith     vi    =  aj      + ai[i];
206615091d37SBarry Smith     nz    =  diag[i] - ai[i];
206715091d37SBarry Smith     idx   +=  2;
2068f1af5d2fSBarry Smith     s1  =  b[idx];s2 = b[1+idx];
206915091d37SBarry Smith     while (nz--) {
207015091d37SBarry Smith       jdx   = 2*(*vi++);
207115091d37SBarry Smith       x1    = x[jdx];x2 = x[1+jdx];
2072f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2073f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
207415091d37SBarry Smith       v    += 4;
207515091d37SBarry Smith     }
2076f1af5d2fSBarry Smith     x[idx]   = s1;
2077f1af5d2fSBarry Smith     x[1+idx] = s2;
207815091d37SBarry Smith   }
207915091d37SBarry Smith   /* backward solve the upper triangular */
208015091d37SBarry Smith   for (i=n-1; i>=0; i--){
208115091d37SBarry Smith     v    = aa + 4*diag[i] + 4;
208215091d37SBarry Smith     vi   = aj + diag[i] + 1;
208315091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
208415091d37SBarry Smith     idt  = 2*i;
2085f1af5d2fSBarry Smith     s1 = x[idt];  s2 = x[1+idt];
208615091d37SBarry Smith     while (nz--) {
208715091d37SBarry Smith       idx   = 2*(*vi++);
208815091d37SBarry Smith       x1    = x[idx];   x2 = x[1+idx];
2089f1af5d2fSBarry Smith       s1 -= v[0]*x1 + v[2]*x2;
2090f1af5d2fSBarry Smith       s2 -= v[1]*x1 + v[3]*x2;
209115091d37SBarry Smith       v    += 4;
209215091d37SBarry Smith     }
209315091d37SBarry Smith     v        = aa +  4*diag[i];
2094f1af5d2fSBarry Smith     x[idt]   = v[0]*s1 + v[2]*s2;
2095f1af5d2fSBarry Smith     x[1+idt] = v[1]*s1 + v[3]*s2;
209615091d37SBarry Smith   }
209715091d37SBarry Smith 
209815091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
209915091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2100*b0a32e0cSBarry Smith   PetscLogFlops(2*4*(a->nz) - 2*A->n);
210115091d37SBarry Smith   PetscFunctionReturn(0);
210215091d37SBarry Smith }
210315091d37SBarry Smith 
21044e2b4712SSatish Balay #undef __FUNC__
21059d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1"
21064e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
21074e2b4712SSatish Balay {
21084e2b4712SSatish Balay   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
21094e2b4712SSatish Balay   IS              iscol=a->col,isrow=a->row;
21104e2b4712SSatish Balay   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
21114e2b4712SSatish Balay   int             *diag = a->diag;
21123f1db9ecSBarry Smith   MatScalar       *aa=a->a,*v;
2113f1af5d2fSBarry Smith   Scalar          *x,*b,s1,*t;
21144e2b4712SSatish Balay 
21154e2b4712SSatish Balay   PetscFunctionBegin;
21164e2b4712SSatish Balay   if (!n) PetscFunctionReturn(0);
21174e2b4712SSatish Balay 
2118e1311b90SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2119e1311b90SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2120f1af5d2fSBarry Smith   t  = a->solve_work;
21214e2b4712SSatish Balay 
21224e2b4712SSatish Balay   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
21234e2b4712SSatish Balay   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
21244e2b4712SSatish Balay 
21254e2b4712SSatish Balay   /* forward solve the lower triangular */
2126f1af5d2fSBarry Smith   t[0] = b[*r++];
21274e2b4712SSatish Balay   for (i=1; i<n; i++) {
21284e2b4712SSatish Balay     v     = aa + ai[i];
21294e2b4712SSatish Balay     vi    = aj + ai[i];
21304e2b4712SSatish Balay     nz    = diag[i] - ai[i];
2131f1af5d2fSBarry Smith     s1  = b[*r++];
21324e2b4712SSatish Balay     while (nz--) {
2133f1af5d2fSBarry Smith       s1 -= (*v++)*t[*vi++];
21344e2b4712SSatish Balay     }
2135f1af5d2fSBarry Smith     t[i] = s1;
21364e2b4712SSatish Balay   }
21374e2b4712SSatish Balay   /* backward solve the upper triangular */
21384e2b4712SSatish Balay   for (i=n-1; i>=0; i--){
21394e2b4712SSatish Balay     v    = aa + diag[i] + 1;
21404e2b4712SSatish Balay     vi   = aj + diag[i] + 1;
21414e2b4712SSatish Balay     nz   = ai[i+1] - diag[i] - 1;
2142f1af5d2fSBarry Smith     s1 = t[i];
21434e2b4712SSatish Balay     while (nz--) {
2144f1af5d2fSBarry Smith       s1 -= (*v++)*t[*vi++];
21454e2b4712SSatish Balay     }
2146f1af5d2fSBarry Smith     x[*c--] = t[i] = aa[diag[i]]*s1;
21474e2b4712SSatish Balay   }
21484e2b4712SSatish Balay 
21494e2b4712SSatish Balay   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
21504e2b4712SSatish Balay   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
2151e1311b90SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2152e1311b90SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2153*b0a32e0cSBarry Smith   PetscLogFlops(2*1*(a->nz) - A->n);
21544e2b4712SSatish Balay   PetscFunctionReturn(0);
21554e2b4712SSatish Balay }
215615091d37SBarry Smith /*
215715091d37SBarry Smith       Special case where the matrix was ILU(0) factored in the natural
215815091d37SBarry Smith    ordering. This eliminates the need for the column and row permutation.
215915091d37SBarry Smith */
216015091d37SBarry Smith #undef __FUNC__
21619d082aa4SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering"
216215091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
216315091d37SBarry Smith {
216415091d37SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
216515091d37SBarry Smith   int             n=a->mbs,*ai=a->i,*aj=a->j;
216615091d37SBarry Smith   int             ierr,*diag = a->diag;
216715091d37SBarry Smith   MatScalar       *aa=a->a;
216815091d37SBarry Smith   Scalar          *x,*b;
2169f1af5d2fSBarry Smith   Scalar          s1,x1;
217015091d37SBarry Smith   MatScalar       *v;
217115091d37SBarry Smith   int             jdx,idt,idx,nz,*vi,i;
217215091d37SBarry Smith 
217315091d37SBarry Smith   PetscFunctionBegin;
217415091d37SBarry Smith   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
217515091d37SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
217615091d37SBarry Smith 
217715091d37SBarry Smith   /* forward solve the lower triangular */
217815091d37SBarry Smith   idx    = 0;
217915091d37SBarry Smith   x[0]   = b[0];
218015091d37SBarry Smith   for (i=1; i<n; i++) {
218115091d37SBarry Smith     v     =  aa      + ai[i];
218215091d37SBarry Smith     vi    =  aj      + ai[i];
218315091d37SBarry Smith     nz    =  diag[i] - ai[i];
218415091d37SBarry Smith     idx   +=  1;
2185f1af5d2fSBarry Smith     s1  =  b[idx];
218615091d37SBarry Smith     while (nz--) {
218715091d37SBarry Smith       jdx   = *vi++;
218815091d37SBarry Smith       x1    = x[jdx];
2189f1af5d2fSBarry Smith       s1 -= v[0]*x1;
219015091d37SBarry Smith       v    += 1;
219115091d37SBarry Smith     }
2192f1af5d2fSBarry Smith     x[idx]   = s1;
219315091d37SBarry Smith   }
219415091d37SBarry Smith   /* backward solve the upper triangular */
219515091d37SBarry Smith   for (i=n-1; i>=0; i--){
219615091d37SBarry Smith     v    = aa + diag[i] + 1;
219715091d37SBarry Smith     vi   = aj + diag[i] + 1;
219815091d37SBarry Smith     nz   = ai[i+1] - diag[i] - 1;
219915091d37SBarry Smith     idt  = i;
2200f1af5d2fSBarry Smith     s1 = x[idt];
220115091d37SBarry Smith     while (nz--) {
220215091d37SBarry Smith       idx   = *vi++;
220315091d37SBarry Smith       x1    = x[idx];
2204f1af5d2fSBarry Smith       s1 -= v[0]*x1;
220515091d37SBarry Smith       v    += 1;
220615091d37SBarry Smith     }
220715091d37SBarry Smith     v        = aa +  diag[i];
2208f1af5d2fSBarry Smith     x[idt]   = v[0]*s1;
220915091d37SBarry Smith   }
221015091d37SBarry Smith   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
221115091d37SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2212*b0a32e0cSBarry Smith   PetscLogFlops(2*(a->nz) - A->n);
221315091d37SBarry Smith   PetscFunctionReturn(0);
221415091d37SBarry Smith }
22154e2b4712SSatish Balay 
22164e2b4712SSatish Balay /* ----------------------------------------------------------------*/
22174e2b4712SSatish Balay /*
22184e2b4712SSatish Balay      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
22194e2b4712SSatish Balay    except that the data structure of Mat_SeqAIJ is slightly different.
22204e2b4712SSatish Balay    Not a good example of code reuse.
22214e2b4712SSatish Balay */
2222ca44d042SBarry Smith EXTERN int MatMissingDiagonal_SeqBAIJ(Mat);
2223435faa5fSBarry Smith 
22244e2b4712SSatish Balay #undef __FUNC__
22259d082aa4SBarry Smith #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
2226435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
22274e2b4712SSatish Balay {
22284e2b4712SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
22294e2b4712SSatish Balay   IS          isicol;
22304e2b4712SSatish Balay   int         *r,*ic,ierr,prow,n = a->mbs,*ai = a->i,*aj = a->j;
22314e2b4712SSatish Balay   int         *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
2232335d9088SBarry Smith   int         *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
2233435faa5fSBarry Smith   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2,levels,diagonal_fill;
22344e2b4712SSatish Balay   PetscTruth  col_identity,row_identity;
2235329f5518SBarry Smith   PetscReal   f;
22364e2b4712SSatish Balay 
22374e2b4712SSatish Balay   PetscFunctionBegin;
2238309c388cSBarry Smith   PetscValidHeaderSpecific(isrow,IS_COOKIE);
2239309c388cSBarry Smith   PetscValidHeaderSpecific(iscol,IS_COOKIE);
2240309c388cSBarry Smith 
2241435faa5fSBarry Smith   if (info) {
2242435faa5fSBarry Smith     f             = info->fill;
2243335d9088SBarry Smith     levels        = (int)info->levels;
2244335d9088SBarry Smith     diagonal_fill = (int)info->diagonal_fill;
2245435faa5fSBarry Smith   } else {
2246435faa5fSBarry Smith     f             = 1.0;
2247435faa5fSBarry Smith     levels        = 0;
2248435faa5fSBarry Smith     diagonal_fill = 0;
2249435faa5fSBarry Smith   }
22503eda8832SBarry Smith   if (!isrow) {
22513eda8832SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr);
22523eda8832SBarry Smith   }
22533eda8832SBarry Smith   if (!iscol) {
22543eda8832SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr);
22553eda8832SBarry Smith   }
22564c49b128SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
2257667159a5SBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
2258667159a5SBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
2259309c388cSBarry Smith 
2260309c388cSBarry Smith   if (!levels && row_identity && col_identity) {  /* special case copy the nonzero structure */
2261bb3d539aSBarry Smith     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
2262bb3d539aSBarry Smith     (*fact)->factor = FACTOR_LU;
2263bb3d539aSBarry Smith     b               = (Mat_SeqBAIJ*)(*fact)->data;
2264bb3d539aSBarry Smith     if (!b->diag) {
2265bb3d539aSBarry Smith       ierr = MatMarkDiagonal_SeqBAIJ(*fact);CHKERRQ(ierr);
2266bb3d539aSBarry Smith     }
2267bb3d539aSBarry Smith     ierr = MatMissingDiagonal_SeqBAIJ(*fact);CHKERRQ(ierr);
2268bb3d539aSBarry Smith     b->row        = isrow;
2269bb3d539aSBarry Smith     b->col        = iscol;
2270bb3d539aSBarry Smith     ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2271bb3d539aSBarry Smith     ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2272bb3d539aSBarry Smith     b->icol       = isicol;
2273*b0a32e0cSBarry Smith     ierr          = PetscMalloc(((*fact)->m+1+b->bs)*sizeof(Scalar),&b->solve_work);CHKERRQ(ierr);
22744e2b4712SSatish Balay     PetscFunctionReturn(0);
2275309c388cSBarry Smith   } else { /* general case perform the symbolic factorization */
22764e2b4712SSatish Balay     ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
22774e2b4712SSatish Balay     ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
22784e2b4712SSatish Balay 
22794e2b4712SSatish Balay     /* get new row pointers */
2280*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&    ainew );CHKERRQ(ierr);
22814e2b4712SSatish Balay     ainew[0] = 0;
22824e2b4712SSatish Balay     /* don't know how many column pointers are needed so estimate */
22834e2b4712SSatish Balay     jmax = (int)(f*ai[n] + 1);
2284*b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&(    ajnew ));CHKERRQ(ierr);
22854e2b4712SSatish Balay     /* ajfill is level of fill for each fill entry */
2286*b0a32e0cSBarry Smith ierr = PetscMalloc((jmax)*sizeof(int),&(    ajfill ));CHKERRQ(ierr);
22874e2b4712SSatish Balay     /* fill is a linked list of nonzeros in active row */
2288*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&    fill );CHKERRQ(ierr);
22894e2b4712SSatish Balay     /* im is level for each filled value */
2290*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&    im );CHKERRQ(ierr);
22914e2b4712SSatish Balay     /* dloc is location of diagonal in factor */
2292*b0a32e0cSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&    dloc );CHKERRQ(ierr);
22934e2b4712SSatish Balay     dloc[0]  = 0;
22944e2b4712SSatish Balay     for (prow=0; prow<n; prow++) {
2295435faa5fSBarry Smith 
2296435faa5fSBarry Smith       /* copy prow into linked list */
22974e2b4712SSatish Balay       nzf        = nz  = ai[r[prow]+1] - ai[r[prow]];
229829bbc08cSBarry Smith       if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
22994e2b4712SSatish Balay       xi         = aj + ai[r[prow]];
23004e2b4712SSatish Balay       fill[n]    = n;
2301435faa5fSBarry Smith       fill[prow] = -1; /* marker for diagonal entry */
23024e2b4712SSatish Balay       while (nz--) {
23034e2b4712SSatish Balay 	fm  = n;
23044e2b4712SSatish Balay 	idx = ic[*xi++];
23054e2b4712SSatish Balay 	do {
23064e2b4712SSatish Balay 	  m  = fm;
23074e2b4712SSatish Balay 	  fm = fill[m];
23084e2b4712SSatish Balay 	} while (fm < idx);
23094e2b4712SSatish Balay 	fill[m]   = idx;
23104e2b4712SSatish Balay 	fill[idx] = fm;
23114e2b4712SSatish Balay 	im[idx]   = 0;
23124e2b4712SSatish Balay       }
2313435faa5fSBarry Smith 
2314435faa5fSBarry Smith       /* make sure diagonal entry is included */
2315435faa5fSBarry Smith       if (diagonal_fill && fill[prow] == -1) {
2316435faa5fSBarry Smith 	fm = n;
2317435faa5fSBarry Smith 	while (fill[fm] < prow) fm = fill[fm];
2318435faa5fSBarry Smith 	fill[prow] = fill[fm];  /* insert diagonal into linked list */
2319435faa5fSBarry Smith 	fill[fm]   = prow;
2320435faa5fSBarry Smith 	im[prow]   = 0;
2321435faa5fSBarry Smith 	nzf++;
2322335d9088SBarry Smith 	dcount++;
2323435faa5fSBarry Smith       }
2324435faa5fSBarry Smith 
23254e2b4712SSatish Balay       nzi = 0;
23264e2b4712SSatish Balay       row = fill[n];
23274e2b4712SSatish Balay       while (row < prow) {
23284e2b4712SSatish Balay 	incrlev = im[row] + 1;
23294e2b4712SSatish Balay 	nz      = dloc[row];
2330435faa5fSBarry Smith 	xi      = ajnew  + ainew[row] + nz + 1;
23314e2b4712SSatish Balay 	flev    = ajfill + ainew[row] + nz + 1;
23324e2b4712SSatish Balay 	nnz     = ainew[row+1] - ainew[row] - nz - 1;
23334e2b4712SSatish Balay 	fm      = row;
23344e2b4712SSatish Balay 	while (nnz-- > 0) {
23354e2b4712SSatish Balay 	  idx = *xi++;
23364e2b4712SSatish Balay 	  if (*flev + incrlev > levels) {
23374e2b4712SSatish Balay 	    flev++;
23384e2b4712SSatish Balay 	    continue;
23394e2b4712SSatish Balay 	  }
23404e2b4712SSatish Balay 	  do {
23414e2b4712SSatish Balay 	    m  = fm;
23424e2b4712SSatish Balay 	    fm = fill[m];
23434e2b4712SSatish Balay 	  } while (fm < idx);
23444e2b4712SSatish Balay 	  if (fm != idx) {
23454e2b4712SSatish Balay 	    im[idx]   = *flev + incrlev;
23464e2b4712SSatish Balay 	    fill[m]   = idx;
23474e2b4712SSatish Balay 	    fill[idx] = fm;
23484e2b4712SSatish Balay 	    fm        = idx;
23494e2b4712SSatish Balay 	    nzf++;
2350ecf371e4SBarry Smith 	  } else {
23514e2b4712SSatish Balay 	    if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
23524e2b4712SSatish Balay 	  }
23534e2b4712SSatish Balay 	  flev++;
23544e2b4712SSatish Balay 	}
23554e2b4712SSatish Balay 	row = fill[row];
23564e2b4712SSatish Balay 	nzi++;
23574e2b4712SSatish Balay       }
23584e2b4712SSatish Balay       /* copy new filled row into permanent storage */
23594e2b4712SSatish Balay       ainew[prow+1] = ainew[prow] + nzf;
23604e2b4712SSatish Balay       if (ainew[prow+1] > jmax) {
2361ecf371e4SBarry Smith 
2362ecf371e4SBarry Smith 	/* estimate how much additional space we will need */
2363ecf371e4SBarry Smith 	/* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
2364ecf371e4SBarry Smith 	/* just double the memory each time */
2365ecf371e4SBarry Smith 	int maxadd = jmax;
2366ecf371e4SBarry Smith 	/* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */
23674e2b4712SSatish Balay 	if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
23684e2b4712SSatish Balay 	jmax += maxadd;
2369ecf371e4SBarry Smith 
2370ecf371e4SBarry Smith 	/* allocate a longer ajnew and ajfill */
2371*b0a32e0cSBarry Smith 	ierr = PetscMalloc(jmax*sizeof(int),&(xi ));CHKERRQ(ierr);
2372549d3d68SSatish Balay 	ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr);
2373606d414cSSatish Balay 	ierr = PetscFree(ajnew);CHKERRQ(ierr);
23744e2b4712SSatish Balay 	ajnew = xi;
2375*b0a32e0cSBarry Smith 	ierr = PetscMalloc(jmax*sizeof(int),&(xi ));CHKERRQ(ierr);
2376549d3d68SSatish Balay 	ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr);
2377606d414cSSatish Balay 	ierr = PetscFree(ajfill);CHKERRQ(ierr);
23784e2b4712SSatish Balay 	ajfill = xi;
2379ecf371e4SBarry Smith 	realloc++; /* count how many reallocations are needed */
23804e2b4712SSatish Balay       }
23814e2b4712SSatish Balay       xi          = ajnew + ainew[prow];
23824e2b4712SSatish Balay       flev        = ajfill + ainew[prow];
23834e2b4712SSatish Balay       dloc[prow]  = nzi;
23844e2b4712SSatish Balay       fm          = fill[n];
23854e2b4712SSatish Balay       while (nzf--) {
23864e2b4712SSatish Balay 	*xi++   = fm;
23874e2b4712SSatish Balay 	*flev++ = im[fm];
23884e2b4712SSatish Balay 	fm      = fill[fm];
23894e2b4712SSatish Balay       }
2390435faa5fSBarry Smith       /* make sure row has diagonal entry */
2391435faa5fSBarry Smith       if (ajnew[ainew[prow]+dloc[prow]] != prow) {
239229bbc08cSBarry Smith 	SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
2393435faa5fSBarry Smith     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
2394435faa5fSBarry Smith       }
23954e2b4712SSatish Balay     }
2396606d414cSSatish Balay     ierr = PetscFree(ajfill);CHKERRQ(ierr);
23974e2b4712SSatish Balay     ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
23984e2b4712SSatish Balay     ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
2399606d414cSSatish Balay     ierr = PetscFree(fill);CHKERRQ(ierr);
2400606d414cSSatish Balay     ierr = PetscFree(im);CHKERRQ(ierr);
24014e2b4712SSatish Balay 
24024e2b4712SSatish Balay     {
2403329f5518SBarry Smith       PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
2404*b0a32e0cSBarry Smith       PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
2405*b0a32e0cSBarry Smith       PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
2406*b0a32e0cSBarry Smith       PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
2407*b0a32e0cSBarry Smith       PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
2408335d9088SBarry Smith       if (diagonal_fill) {
2409*b0a32e0cSBarry Smith 	PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
2410335d9088SBarry Smith       }
24114e2b4712SSatish Balay     }
24124e2b4712SSatish Balay 
24134e2b4712SSatish Balay     /* put together the new matrix */
24144e2b4712SSatish Balay     ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
2415*b0a32e0cSBarry Smith     PetscLogObjectParent(*fact,isicol);
24164e2b4712SSatish Balay     b = (Mat_SeqBAIJ*)(*fact)->data;
2417606d414cSSatish Balay     ierr = PetscFree(b->imax);CHKERRQ(ierr);
24187c922b88SBarry Smith     b->singlemalloc = PETSC_FALSE;
24193f1db9ecSBarry Smith     len = bs2*ainew[n]*sizeof(MatScalar);
24204e2b4712SSatish Balay     /* the next line frees the default space generated by the Create() */
2421606d414cSSatish Balay     ierr = PetscFree(b->a);CHKERRQ(ierr);
2422606d414cSSatish Balay     ierr = PetscFree(b->ilen);CHKERRQ(ierr);
2423*b0a32e0cSBarry Smith ierr = PetscMalloc(len,&(    b->a          ));CHKERRQ(ierr);
24244e2b4712SSatish Balay     b->j          = ajnew;
24254e2b4712SSatish Balay     b->i          = ainew;
24264e2b4712SSatish Balay     for (i=0; i<n; i++) dloc[i] += ainew[i];
24274e2b4712SSatish Balay     b->diag       = dloc;
24284e2b4712SSatish Balay     b->ilen       = 0;
24294e2b4712SSatish Balay     b->imax       = 0;
24304e2b4712SSatish Balay     b->row        = isrow;
24314e2b4712SSatish Balay     b->col        = iscol;
2432c38d4ed2SBarry Smith     ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2433c38d4ed2SBarry Smith     ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2434e51c0b9cSSatish Balay     b->icol       = isicol;
2435*b0a32e0cSBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(Scalar),&    b->solve_work );CHKERRQ(ierr);
24364e2b4712SSatish Balay     /* In b structure:  Free imax, ilen, old a, old j.
24374e2b4712SSatish Balay        Allocate dloc, solve_work, new a, new j */
2438*b0a32e0cSBarry Smith     PetscLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
24394e2b4712SSatish Balay     b->maxnz          = b->nz = ainew[n];
24404e2b4712SSatish Balay     (*fact)->factor   = FACTOR_LU;
24414e2b4712SSatish Balay 
24424e2b4712SSatish Balay     (*fact)->info.factor_mallocs    = realloc;
24434e2b4712SSatish Balay     (*fact)->info.fill_ratio_given  = f;
2444329f5518SBarry Smith     (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
2445309c388cSBarry Smith   }
24464e2b4712SSatish Balay 
2447309c388cSBarry Smith   if (row_identity && col_identity) {
2448309c388cSBarry Smith     switch (b->bs) {
2449309c388cSBarry Smith       case 2:
2450309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
2451309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
2452*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
2453309c388cSBarry Smith         break;
2454309c388cSBarry Smith       case 3:
2455309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
2456309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
2457*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
2458309c388cSBarry Smith         break;
2459309c388cSBarry Smith       case 4:
2460309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
2461309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
2462*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
2463309c388cSBarry Smith         break;
2464309c388cSBarry Smith       case 5:
2465309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
2466309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
2467*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
2468309c388cSBarry Smith         break;
2469309c388cSBarry Smith       case 6:
2470309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
2471309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
2472*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
2473309c388cSBarry Smith         break;
2474309c388cSBarry Smith       case 7:
2475309c388cSBarry Smith         (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
2476309c388cSBarry Smith         (*fact)->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
2477*b0a32e0cSBarry Smith         PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
2478309c388cSBarry Smith       break;
2479309c388cSBarry Smith     }
2480309c388cSBarry Smith   }
24814e2b4712SSatish Balay   PetscFunctionReturn(0);
24824e2b4712SSatish Balay }
24834e2b4712SSatish Balay 
24844e2b4712SSatish Balay 
24854e2b4712SSatish Balay 
24864e2b4712SSatish Balay 
2487