xref: /petsc/src/mat/impls/baij/seq/baijfact2.c (revision a3b388050d4e8a4c38128e174a7572fa24ee1ba5)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baijfact2.c,v 1.28 1999/05/12 03:29:44 bsmith Exp balay $";
3 #endif
4 /*
5     Factorization code for BAIJ format.
6 */
7 
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/ilu.h"
11 #include "src/inline/dot.h"
12 
13 /* ----------------------------------------------------------- */
14 #undef __FUNC__
15 #define __FUNC__ "MatSolve_SeqBAIJ_N"
16 int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
17 {
18   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
19   IS              iscol=a->col,isrow=a->row;
20   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
21   int             nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
22   MatScalar       *aa=a->a,*v;
23   Scalar          *x,*b,*sum,*tmp,*lsum;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   tmp  = a->solve_work;
29 
30   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
31   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
32 
33   /* forward solve the lower triangular */
34   ierr = PetscMemcpy(tmp,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
35   for ( i=1; i<n; i++ ) {
36     v   = aa + bs2*ai[i];
37     vi  = aj + ai[i];
38     nz  = a->diag[i] - ai[i];
39     sum = tmp + bs*i;
40     ierr = PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr);
41     while (nz--) {
42       Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++));
43       v += bs2;
44     }
45   }
46   /* backward solve the upper triangular */
47   lsum = a->solve_work + a->n;
48   for ( i=n-1; i>=0; i-- ){
49     v   = aa + bs2*(a->diag[i] + 1);
50     vi  = aj + a->diag[i] + 1;
51     nz  = ai[i+1] - a->diag[i] - 1;
52     ierr = PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
53     while (nz--) {
54       Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++));
55       v += bs2;
56     }
57     Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs);
58     ierr = PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr);
59   }
60 
61   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
62   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
63   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
64   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
65   PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNC__
70 #define __FUNC__ "MatSolve_SeqBAIJ_7"
71 int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
72 {
73   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
74   IS              iscol=a->col,isrow=a->row;
75   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
76   int             *diag = a->diag;
77   MatScalar       *aa=a->a,*v;
78   Scalar          sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
79   Scalar          *x,*b,*tmp;
80 
81   PetscFunctionBegin;
82   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
83   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
84   tmp  = a->solve_work;
85 
86   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
87   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
88 
89   /* forward solve the lower triangular */
90   idx    = 7*(*r++);
91   tmp[0] = b[idx];   tmp[1] = b[1+idx];
92   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
93   tmp[5] = b[5+idx]; tmp[6] = b[6+idx];
94 
95   for ( i=1; i<n; i++ ) {
96     v     = aa + 49*ai[i];
97     vi    = aj + ai[i];
98     nz    = diag[i] - ai[i];
99     idx   = 7*(*r++);
100     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
101     sum5  = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx];
102     while (nz--) {
103       idx   = 7*(*vi++);
104       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
105       x4    = tmp[3+idx];x5 = tmp[4+idx];
106       x6    = tmp[5+idx];x7 = tmp[6+idx];
107       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
108       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
109       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
110       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
111       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
112       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
113       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
114       v += 49;
115     }
116     idx = 7*i;
117     tmp[idx]   = sum1;tmp[1+idx] = sum2;
118     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
119     tmp[5+idx] = sum6;tmp[6+idx] = sum7;
120   }
121   /* backward solve the upper triangular */
122   for ( i=n-1; i>=0; i-- ){
123     v    = aa + 49*diag[i] + 49;
124     vi   = aj + diag[i] + 1;
125     nz   = ai[i+1] - diag[i] - 1;
126     idt  = 7*i;
127     sum1 = tmp[idt];  sum2 = tmp[1+idt];
128     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
129     sum6 = tmp[5+idt];sum7 = tmp[6+idt];
130     while (nz--) {
131       idx   = 7*(*vi++);
132       x1    = tmp[idx];   x2 = tmp[1+idx];
133       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
134       x6    = tmp[5+idx]; x7 = tmp[6+idx];
135       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
136       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
137       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
138       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
139       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
140       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
141       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
142       v += 49;
143     }
144     idc = 7*(*c--);
145     v   = aa + 49*diag[i];
146     x[idc]   = tmp[idt]   = v[0]*sum1+v[7]*sum2+v[14]*sum3+
147                                  v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7;
148     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+
149                                  v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7;
150     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+
151                                  v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7;
152     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+
153                                  v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7;
154     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+
155                                  v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7;
156     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+
157                                  v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7;
158     x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+
159                                  v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7;
160   }
161 
162   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
163   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
164   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
165   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
166   PLogFlops(2*49*(a->nz) - 7*a->n);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNC__
171 #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering"
172 int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
173 {
174   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
175   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
176   int             ierr,*diag = a->diag,jdx;
177   MatScalar       *aa=a->a,*v;
178   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7;
179 
180   PetscFunctionBegin;
181   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
182   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
183   /* forward solve the lower triangular */
184   idx    = 0;
185   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
186   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
187   x[6] = b[6+idx];
188   for ( i=1; i<n; i++ ) {
189     v     =  aa + 49*ai[i];
190     vi    =  aj + ai[i];
191     nz    =  diag[i] - ai[i];
192     idx   =  7*i;
193     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
194     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
195     sum7  =  b[6+idx];
196     while (nz--) {
197       jdx   = 7*(*vi++);
198       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
199       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
200       x7    = x[6+jdx];
201       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
202       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
203       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
204       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
205       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
206       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
207       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
208       v += 49;
209      }
210     x[idx]   = sum1;
211     x[1+idx] = sum2;
212     x[2+idx] = sum3;
213     x[3+idx] = sum4;
214     x[4+idx] = sum5;
215     x[5+idx] = sum6;
216     x[6+idx] = sum7;
217   }
218   /* backward solve the upper triangular */
219   for ( i=n-1; i>=0; i-- ){
220     v    = aa + 49*diag[i] + 49;
221     vi   = aj + diag[i] + 1;
222     nz   = ai[i+1] - diag[i] - 1;
223     idt  = 7*i;
224     sum1 = x[idt];   sum2 = x[1+idt];
225     sum3 = x[2+idt]; sum4 = x[3+idt];
226     sum5 = x[4+idt]; sum6 = x[5+idt];
227     sum7 = x[6+idt];
228     while (nz--) {
229       idx   = 7*(*vi++);
230       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
231       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
232       x7    = x[6+idx];
233       sum1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
234       sum2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
235       sum3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
236       sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
237       sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
238       sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
239       sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
240       v += 49;
241     }
242     v        = aa + 49*diag[i];
243     x[idt]   = v[0]*sum1 + v[7]*sum2  + v[14]*sum3 + v[21]*sum4
244                          + v[28]*sum5 + v[35]*sum6 + v[42]*sum7;
245     x[1+idt] = v[1]*sum1 + v[8]*sum2  + v[15]*sum3 + v[22]*sum4
246                          + v[29]*sum5 + v[36]*sum6 + v[43]*sum7;
247     x[2+idt] = v[2]*sum1 + v[9]*sum2  + v[16]*sum3 + v[23]*sum4
248                          + v[30]*sum5 + v[37]*sum6 + v[44]*sum7;
249     x[3+idt] = v[3]*sum1 + v[10]*sum2  + v[17]*sum3 + v[24]*sum4
250                          + v[31]*sum5 + v[38]*sum6 + v[45]*sum7;
251     x[4+idt] = v[4]*sum1 + v[11]*sum2  + v[18]*sum3 + v[25]*sum4
252                          + v[32]*sum5 + v[39]*sum6 + v[46]*sum7;
253     x[5+idt] = v[5]*sum1 + v[12]*sum2  + v[19]*sum3 + v[26]*sum4
254                          + v[33]*sum5 + v[40]*sum6 + v[47]*sum7;
255     x[6+idt] = v[6]*sum1 + v[13]*sum2  + v[20]*sum3 + v[27]*sum4
256                          + v[34]*sum5 + v[41]*sum6 + v[48]*sum7;
257   }
258 
259   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
260   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
261   PLogFlops(2*36*(a->nz) - 6*a->n);
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNC__
266 #define __FUNC__ "MatSolve_SeqBAIJ_6"
267 int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
268 {
269   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
270   IS              iscol=a->col,isrow=a->row;
271   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
272   int             *diag = a->diag;
273   MatScalar       *aa=a->a,*v;
274   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp;
275 
276   PetscFunctionBegin;
277   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
278   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
279   tmp  = a->solve_work;
280 
281   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
282   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
283 
284   /* forward solve the lower triangular */
285   idx    = 6*(*r++);
286   tmp[0] = b[idx];   tmp[1] = b[1+idx];
287   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
288   tmp[4] = b[4+idx]; tmp[5] = b[5+idx];
289   for ( i=1; i<n; i++ ) {
290     v     = aa + 36*ai[i];
291     vi    = aj + ai[i];
292     nz    = diag[i] - ai[i];
293     idx   = 6*(*r++);
294     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
295     sum5  = b[4+idx]; sum6 = b[5+idx];
296     while (nz--) {
297       idx   = 6*(*vi++);
298       x1    = tmp[idx];   x2 = tmp[1+idx]; x3 = tmp[2+idx];
299       x4    = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx];
300       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
301       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
302       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
303       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
304       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
305       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
306       v += 36;
307     }
308     idx = 6*i;
309     tmp[idx]   = sum1;tmp[1+idx] = sum2;
310     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
311     tmp[4+idx] = sum5;tmp[5+idx] = sum6;
312   }
313   /* backward solve the upper triangular */
314   for ( i=n-1; i>=0; i-- ){
315     v    = aa + 36*diag[i] + 36;
316     vi   = aj + diag[i] + 1;
317     nz   = ai[i+1] - diag[i] - 1;
318     idt  = 6*i;
319     sum1 = tmp[idt];  sum2 = tmp[1+idt];
320     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
321     sum5 = tmp[4+idt];sum6 = tmp[5+idt];
322     while (nz--) {
323       idx   = 6*(*vi++);
324       x1    = tmp[idx];   x2 = tmp[1+idx];
325       x3    = tmp[2+idx]; x4 = tmp[3+idx];
326       x5    = tmp[4+idx]; x6 = tmp[5+idx];
327       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
328       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
329       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
330       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
331       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
332       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
333       v += 36;
334     }
335     idc = 6*(*c--);
336     v   = aa + 36*diag[i];
337     x[idc]   = tmp[idt]   = v[0]*sum1+v[6]*sum2+v[12]*sum3+
338                                  v[18]*sum4+v[24]*sum5+v[30]*sum6;
339     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+
340                                  v[19]*sum4+v[25]*sum5+v[31]*sum6;
341     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+
342                                  v[20]*sum4+v[26]*sum5+v[32]*sum6;
343     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+
344                                  v[21]*sum4+v[27]*sum5+v[33]*sum6;
345     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+
346                                  v[22]*sum4+v[28]*sum5+v[34]*sum6;
347     x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+
348                                  v[23]*sum4+v[29]*sum5+v[35]*sum6;
349   }
350 
351   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
352   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
353   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
354   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
355   PLogFlops(2*36*(a->nz) - 6*a->n);
356   PetscFunctionReturn(0);
357 }
358 
359 #undef __FUNC__
360 #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering"
361 int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
362 {
363   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
364   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
365   int             ierr,*diag = a->diag,jdx;
366   MatScalar       *aa=a->a,*v;
367   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
371   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
372   /* forward solve the lower triangular */
373   idx    = 0;
374   x[0] = b[idx];   x[1] = b[1+idx]; x[2] = b[2+idx];
375   x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
376   for ( i=1; i<n; i++ ) {
377     v     =  aa + 36*ai[i];
378     vi    =  aj + ai[i];
379     nz    =  diag[i] - ai[i];
380     idx   =  6*i;
381     sum1  =  b[idx];   sum2 = b[1+idx]; sum3 = b[2+idx];
382     sum4  =  b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx];
383     while (nz--) {
384       jdx   = 6*(*vi++);
385       x1    = x[jdx];   x2 = x[1+jdx]; x3 = x[2+jdx];
386       x4    = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
387       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
388       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
389       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
390       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
391       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
392       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
393       v += 36;
394      }
395     x[idx]   = sum1;
396     x[1+idx] = sum2;
397     x[2+idx] = sum3;
398     x[3+idx] = sum4;
399     x[4+idx] = sum5;
400     x[5+idx] = sum6;
401   }
402   /* backward solve the upper triangular */
403   for ( i=n-1; i>=0; i-- ){
404     v    = aa + 36*diag[i] + 36;
405     vi   = aj + diag[i] + 1;
406     nz   = ai[i+1] - diag[i] - 1;
407     idt  = 6*i;
408     sum1 = x[idt];   sum2 = x[1+idt];
409     sum3 = x[2+idt]; sum4 = x[3+idt];
410     sum5 = x[4+idt]; sum6 = x[5+idt];
411     while (nz--) {
412       idx   = 6*(*vi++);
413       x1    = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
414       x4    = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
415       sum1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
416       sum2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
417       sum3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
418       sum4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
419       sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
420       sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
421       v += 36;
422     }
423     v        = aa + 36*diag[i];
424     x[idt]   = v[0]*sum1 + v[6]*sum2  + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6;
425     x[1+idt] = v[1]*sum1 + v[7]*sum2  + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6;
426     x[2+idt] = v[2]*sum1 + v[8]*sum2  + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6;
427     x[3+idt] = v[3]*sum1 + v[9]*sum2  + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6;
428     x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6;
429     x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6;
430   }
431 
432   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
433   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
434   PLogFlops(2*36*(a->nz) - 6*a->n);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNC__
439 #define __FUNC__ "MatSolve_SeqBAIJ_5"
440 int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
441 {
442   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
443   IS              iscol=a->col,isrow=a->row;
444   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
445   int             *diag = a->diag;
446   MatScalar       *aa=a->a,*v;
447   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp;
448 
449   PetscFunctionBegin;
450   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
451   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
452   tmp  = a->solve_work;
453 
454   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
455   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
456 
457   /* forward solve the lower triangular */
458   idx    = 5*(*r++);
459   tmp[0] = b[idx];   tmp[1] = b[1+idx];
460   tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx];
461   for ( i=1; i<n; i++ ) {
462     v     = aa + 25*ai[i];
463     vi    = aj + ai[i];
464     nz    = diag[i] - ai[i];
465     idx   = 5*(*r++);
466     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
467     sum5  = b[4+idx];
468     while (nz--) {
469       idx   = 5*(*vi++);
470       x1    = tmp[idx];  x2 = tmp[1+idx];x3 = tmp[2+idx];
471       x4    = tmp[3+idx];x5 = tmp[4+idx];
472       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
473       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
474       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
475       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
476       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
477       v += 25;
478     }
479     idx = 5*i;
480     tmp[idx]   = sum1;tmp[1+idx] = sum2;
481     tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5;
482   }
483   /* backward solve the upper triangular */
484   for ( i=n-1; i>=0; i-- ){
485     v    = aa + 25*diag[i] + 25;
486     vi   = aj + diag[i] + 1;
487     nz   = ai[i+1] - diag[i] - 1;
488     idt  = 5*i;
489     sum1 = tmp[idt];  sum2 = tmp[1+idt];
490     sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt];
491     while (nz--) {
492       idx   = 5*(*vi++);
493       x1    = tmp[idx];   x2 = tmp[1+idx];
494       x3    = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx];
495       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
496       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
497       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
498       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
499       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
500       v += 25;
501     }
502     idc = 5*(*c--);
503     v   = aa + 25*diag[i];
504     x[idc]   = tmp[idt]   = v[0]*sum1+v[5]*sum2+v[10]*sum3+
505                                  v[15]*sum4+v[20]*sum5;
506     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+
507                                  v[16]*sum4+v[21]*sum5;
508     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+
509                                  v[17]*sum4+v[22]*sum5;
510     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+
511                                  v[18]*sum4+v[23]*sum5;
512     x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+
513                                  v[19]*sum4+v[24]*sum5;
514   }
515 
516   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
517   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
518   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
519   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
520   PLogFlops(2*25*(a->nz) - 5*a->n);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNC__
525 #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering"
526 int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
527 {
528   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
529   int             i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
530   int             ierr,*diag = a->diag,jdx;
531   MatScalar       *aa=a->a,*v;
532   Scalar          *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;;
533 
534   PetscFunctionBegin;
535   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
536   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
537   /* forward solve the lower triangular */
538   idx    = 0;
539   x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
540   for ( i=1; i<n; i++ ) {
541     v     =  aa + 25*ai[i];
542     vi    =  aj + ai[i];
543     nz    =  diag[i] - ai[i];
544     idx   =  5*i;
545     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx];
546     while (nz--) {
547       jdx   = 5*(*vi++);
548       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
549       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
550       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
551       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
552       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
553       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
554       v    += 25;
555     }
556     x[idx]   = sum1;
557     x[1+idx] = sum2;
558     x[2+idx] = sum3;
559     x[3+idx] = sum4;
560     x[4+idx] = sum5;
561   }
562   /* backward solve the upper triangular */
563   for ( i=n-1; i>=0; i-- ){
564     v    = aa + 25*diag[i] + 25;
565     vi   = aj + diag[i] + 1;
566     nz   = ai[i+1] - diag[i] - 1;
567     idt  = 5*i;
568     sum1 = x[idt];  sum2 = x[1+idt];
569     sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt];
570     while (nz--) {
571       idx   = 5*(*vi++);
572       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
573       sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
574       sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
575       sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
576       sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
577       sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
578       v    += 25;
579     }
580     v        = aa + 25*diag[i];
581     x[idt]   = v[0]*sum1 + v[5]*sum2 + v[10]*sum3  + v[15]*sum4 + v[20]*sum5;
582     x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3  + v[16]*sum4 + v[21]*sum5;
583     x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3  + v[17]*sum4 + v[22]*sum5;
584     x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3  + v[18]*sum4 + v[23]*sum5;
585     x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3  + v[19]*sum4 + v[24]*sum5;
586   }
587 
588   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
589   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
590   PLogFlops(2*25*(a->nz) - 5*a->n);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatSolve_SeqBAIJ_4"
596 int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
597 {
598   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
599   IS              iscol=a->col,isrow=a->row;
600   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
601   int             *diag = a->diag;
602   MatScalar       *aa=a->a,*v;
603   Scalar          *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp;
604 
605   PetscFunctionBegin;
606   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
607   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
608   tmp  = a->solve_work;
609 
610   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
611   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
612 
613   /* forward solve the lower triangular */
614   idx    = 4*(*r++);
615   tmp[0] = b[idx];   tmp[1] = b[1+idx];
616   tmp[2] = b[2+idx]; tmp[3] = b[3+idx];
617   for ( i=1; i<n; i++ ) {
618     v     = aa + 16*ai[i];
619     vi    = aj + ai[i];
620     nz    = diag[i] - ai[i];
621     idx   = 4*(*r++);
622     sum1  = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
623     while (nz--) {
624       idx   = 4*(*vi++);
625       x1    = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx];
626       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
627       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
628       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
629       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
630       v    += 16;
631     }
632     idx        = 4*i;
633     tmp[idx]   = sum1;tmp[1+idx] = sum2;
634     tmp[2+idx] = sum3;tmp[3+idx] = sum4;
635   }
636   /* backward solve the upper triangular */
637   for ( i=n-1; i>=0; i-- ){
638     v    = aa + 16*diag[i] + 16;
639     vi   = aj + diag[i] + 1;
640     nz   = ai[i+1] - diag[i] - 1;
641     idt  = 4*i;
642     sum1 = tmp[idt];  sum2 = tmp[1+idt];
643     sum3 = tmp[2+idt];sum4 = tmp[3+idt];
644     while (nz--) {
645       idx   = 4*(*vi++);
646       x1    = tmp[idx];   x2 = tmp[1+idx];
647       x3    = tmp[2+idx]; x4 = tmp[3+idx];
648       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
649       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
650       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
651       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
652       v += 16;
653     }
654     idc      = 4*(*c--);
655     v        = aa + 16*diag[i];
656     x[idc]   = tmp[idt]   = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4;
657     x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4;
658     x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4;
659     x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4;
660   }
661 
662   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
663   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
664   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
665   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
666   PLogFlops(2*16*(a->nz) - 4*a->n);
667   PetscFunctionReturn(0);
668 }
669 
670 
671 /*
672       Special case where the matrix was ILU(0) factored in the natural
673    ordering. This eliminates the need for the column and row permutation.
674 */
675 #undef __FUNC__
676 #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering"
677 int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
678 {
679   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
680   int             n=a->mbs,*ai=a->i,*aj=a->j;
681   int             ierr,*diag = a->diag;
682   MatScalar       *aa=a->a;
683   Scalar          *x,*b;
684 
685   PetscFunctionBegin;
686   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
687   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
688 
689 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
690   {
691     static Scalar w[2000]; /* very BAD need to fix */
692     fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
693   }
694 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
695   {
696     static Scalar w[2000]; /* very BAD need to fix */
697     fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
698   }
699 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
700   fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
701 #else
702   {
703     Scalar    sum1,sum2,sum3,sum4,x1,x2,x3,x4;
704     MatScalar *v;
705     int       jdx,idt,idx,nz,*vi,i;
706 
707   /* forward solve the lower triangular */
708   idx    = 0;
709   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
710   for ( i=1; i<n; i++ ) {
711     v     =  aa      + 16*ai[i];
712     vi    =  aj      + ai[i];
713     nz    =  diag[i] - ai[i];
714     idx   +=  4;
715     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];
716     while (nz--) {
717       jdx   = 4*(*vi++);
718       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
719       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
720       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
721       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
722       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
723       v    += 16;
724     }
725     x[idx]   = sum1;
726     x[1+idx] = sum2;
727     x[2+idx] = sum3;
728     x[3+idx] = sum4;
729   }
730   /* backward solve the upper triangular */
731   for ( i=n-1; i>=0; i-- ){
732     v    = aa + 16*diag[i] + 16;
733     vi   = aj + diag[i] + 1;
734     nz   = ai[i+1] - diag[i] - 1;
735     idt  = 4*i;
736     sum1 = x[idt];  sum2 = x[1+idt];
737     sum3 = x[2+idt];sum4 = x[3+idt];
738     while (nz--) {
739       idx   = 4*(*vi++);
740       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx]; x4 = x[3+idx];
741       sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
742       sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
743       sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
744       sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
745       v    += 16;
746     }
747     v        = aa + 16*diag[i];
748     x[idt]   = v[0]*sum1 + v[4]*sum2 + v[8]*sum3  + v[12]*sum4;
749     x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3  + v[13]*sum4;
750     x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4;
751     x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4;
752   }
753   }
754 #endif
755 
756   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
757   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
758   PLogFlops(2*16*(a->nz) - 4*a->n);
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNC__
763 #define __FUNC__ "MatSolve_SeqBAIJ_3"
764 int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
765 {
766   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
767   IS              iscol=a->col,isrow=a->row;
768   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
769   int             *diag = a->diag;
770   MatScalar       *aa=a->a,*v;
771   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp;
772 
773   PetscFunctionBegin;
774   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
775   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
776   tmp  = a->solve_work;
777 
778   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
779   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
780 
781   /* forward solve the lower triangular */
782   idx    = 3*(*r++);
783   tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx];
784   for ( i=1; i<n; i++ ) {
785     v     = aa + 9*ai[i];
786     vi    = aj + ai[i];
787     nz    = diag[i] - ai[i];
788     idx   = 3*(*r++);
789     sum1  = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx];
790     while (nz--) {
791       idx   = 3*(*vi++);
792       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
793       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
794       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
795       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
796       v += 9;
797     }
798     idx = 3*i;
799     tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3;
800   }
801   /* backward solve the upper triangular */
802   for ( i=n-1; i>=0; i-- ){
803     v    = aa + 9*diag[i] + 9;
804     vi   = aj + diag[i] + 1;
805     nz   = ai[i+1] - diag[i] - 1;
806     idt  = 3*i;
807     sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt];
808     while (nz--) {
809       idx   = 3*(*vi++);
810       x1    = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx];
811       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
812       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
813       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
814       v += 9;
815     }
816     idc = 3*(*c--);
817     v   = aa + 9*diag[i];
818     x[idc]   = tmp[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
819     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
820     x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
821   }
822   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
823   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
824   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
825   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826   PLogFlops(2*9*(a->nz) - 3*a->n);
827   PetscFunctionReturn(0);
828 }
829 
830 /*
831       Special case where the matrix was ILU(0) factored in the natural
832    ordering. This eliminates the need for the column and row permutation.
833 */
834 #undef __FUNC__
835 #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering"
836 int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
837 {
838   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
839   int             n=a->mbs,*ai=a->i,*aj=a->j;
840   int             ierr,*diag = a->diag;
841   MatScalar       *aa=a->a, *v;
842   Scalar          *x,*b,sum1,sum2,sum3,x1,x2,x3;
843   int             jdx,idt,idx,nz,*vi,i;
844 
845   PetscFunctionBegin;
846   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
847   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
848 
849 
850   /* forward solve the lower triangular */
851   idx    = 0;
852   x[0]   = b[0]; x[1] = b[1]; x[2] = b[2];
853   for ( i=1; i<n; i++ ) {
854     v     =  aa      + 9*ai[i];
855     vi    =  aj      + ai[i];
856     nz    =  diag[i] - ai[i];
857     idx   +=  3;
858     sum1  =  b[idx];sum2 = b[1+idx];sum3 = b[2+idx];
859     while (nz--) {
860       jdx   = 3*(*vi++);
861       x1    = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
862       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
863       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
864       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
865       v    += 9;
866     }
867     x[idx]   = sum1;
868     x[1+idx] = sum2;
869     x[2+idx] = sum3;
870   }
871   /* backward solve the upper triangular */
872   for ( i=n-1; i>=0; i-- ){
873     v    = aa + 9*diag[i] + 9;
874     vi   = aj + diag[i] + 1;
875     nz   = ai[i+1] - diag[i] - 1;
876     idt  = 3*i;
877     sum1 = x[idt];  sum2 = x[1+idt];
878     sum3 = x[2+idt];
879     while (nz--) {
880       idx   = 3*(*vi++);
881       x1    = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
882       sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
883       sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
884       sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
885       v    += 9;
886     }
887     v        = aa +  9*diag[i];
888     x[idt]   = v[0]*sum1 + v[3]*sum2 + v[6]*sum3;
889     x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3;
890     x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3;
891   }
892 
893   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
894   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
895   PLogFlops(2*9*(a->nz) - 3*a->n);
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNC__
900 #define __FUNC__ "MatSolve_SeqBAIJ_2"
901 int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
902 {
903   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
904   IS              iscol=a->col,isrow=a->row;
905   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
906   int             *diag = a->diag;
907   MatScalar       *aa=a->a,*v;
908   Scalar          *x,*b,sum1,sum2,x1,x2,*tmp;
909 
910   PetscFunctionBegin;
911   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
912   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
913   tmp  = a->solve_work;
914 
915   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
916   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
917 
918   /* forward solve the lower triangular */
919   idx    = 2*(*r++);
920   tmp[0] = b[idx]; tmp[1] = b[1+idx];
921   for ( i=1; i<n; i++ ) {
922     v     = aa + 4*ai[i];
923     vi    = aj + ai[i];
924     nz    = diag[i] - ai[i];
925     idx   = 2*(*r++);
926     sum1  = b[idx]; sum2 = b[1+idx];
927     while (nz--) {
928       idx   = 2*(*vi++);
929       x1    = tmp[idx]; x2 = tmp[1+idx];
930       sum1 -= v[0]*x1 + v[2]*x2;
931       sum2 -= v[1]*x1 + v[3]*x2;
932       v += 4;
933     }
934     idx = 2*i;
935     tmp[idx] = sum1; tmp[1+idx] = sum2;
936   }
937   /* backward solve the upper triangular */
938   for ( i=n-1; i>=0; i-- ){
939     v    = aa + 4*diag[i] + 4;
940     vi   = aj + diag[i] + 1;
941     nz   = ai[i+1] - diag[i] - 1;
942     idt  = 2*i;
943     sum1 = tmp[idt]; sum2 = tmp[1+idt];
944     while (nz--) {
945       idx   = 2*(*vi++);
946       x1    = tmp[idx]; x2 = tmp[1+idx];
947       sum1 -= v[0]*x1 + v[2]*x2;
948       sum2 -= v[1]*x1 + v[3]*x2;
949       v += 4;
950     }
951     idc = 2*(*c--);
952     v   = aa + 4*diag[i];
953     x[idc]   = tmp[idt]   = v[0]*sum1 + v[2]*sum2;
954     x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2;
955   }
956   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
957   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
959   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960   PLogFlops(2*4*(a->nz) - 2*a->n);
961   PetscFunctionReturn(0);
962 }
963 
964 /*
965       Special case where the matrix was ILU(0) factored in the natural
966    ordering. This eliminates the need for the column and row permutation.
967 */
968 #undef __FUNC__
969 #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering"
970 int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
971 {
972   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
973   int             n=a->mbs,*ai=a->i,*aj=a->j;
974   int             ierr,*diag = a->diag;
975   MatScalar       *aa=a->a,*v;
976   Scalar          *x,*b,sum1,sum2,x1,x2;
977   int             jdx,idt,idx,nz,*vi,i;
978 
979   PetscFunctionBegin;
980   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
981   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
982 
983   /* forward solve the lower triangular */
984   idx    = 0;
985   x[0]   = b[0]; x[1] = b[1];
986   for ( i=1; i<n; i++ ) {
987     v     =  aa      + 4*ai[i];
988     vi    =  aj      + ai[i];
989     nz    =  diag[i] - ai[i];
990     idx   +=  2;
991     sum1  =  b[idx];sum2 = b[1+idx];
992     while (nz--) {
993       jdx   = 2*(*vi++);
994       x1    = x[jdx];x2 = x[1+jdx];
995       sum1 -= v[0]*x1 + v[2]*x2;
996       sum2 -= v[1]*x1 + v[3]*x2;
997       v    += 4;
998     }
999     x[idx]   = sum1;
1000     x[1+idx] = sum2;
1001   }
1002   /* backward solve the upper triangular */
1003   for ( i=n-1; i>=0; i-- ){
1004     v    = aa + 4*diag[i] + 4;
1005     vi   = aj + diag[i] + 1;
1006     nz   = ai[i+1] - diag[i] - 1;
1007     idt  = 2*i;
1008     sum1 = x[idt];  sum2 = x[1+idt];
1009     while (nz--) {
1010       idx   = 2*(*vi++);
1011       x1    = x[idx];   x2 = x[1+idx];
1012       sum1 -= v[0]*x1 + v[2]*x2;
1013       sum2 -= v[1]*x1 + v[3]*x2;
1014       v    += 4;
1015     }
1016     v        = aa +  4*diag[i];
1017     x[idt]   = v[0]*sum1 + v[2]*sum2;
1018     x[1+idt] = v[1]*sum1 + v[3]*sum2;
1019   }
1020 
1021   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1022   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023   PLogFlops(2*4*(a->nz) - 2*a->n);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNC__
1028 #define __FUNC__ "MatSolve_SeqBAIJ_1"
1029 int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
1030 {
1031   Mat_SeqBAIJ     *a=(Mat_SeqBAIJ *)A->data;
1032   IS              iscol=a->col,isrow=a->row;
1033   int             *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
1034   int             *diag = a->diag;
1035   MatScalar       *aa=a->a,*v;
1036   Scalar          *x,*b,sum1,*tmp;
1037 
1038   PetscFunctionBegin;
1039   if (!n) PetscFunctionReturn(0);
1040 
1041   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1042   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1043   tmp  = a->solve_work;
1044 
1045   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1046   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1047 
1048   /* forward solve the lower triangular */
1049   tmp[0] = b[*r++];
1050   for ( i=1; i<n; i++ ) {
1051     v     = aa + ai[i];
1052     vi    = aj + ai[i];
1053     nz    = diag[i] - ai[i];
1054     sum1  = b[*r++];
1055     while (nz--) {
1056       sum1 -= (*v++)*tmp[*vi++];
1057     }
1058     tmp[i] = sum1;
1059   }
1060   /* backward solve the upper triangular */
1061   for ( i=n-1; i>=0; i-- ){
1062     v    = aa + diag[i] + 1;
1063     vi   = aj + diag[i] + 1;
1064     nz   = ai[i+1] - diag[i] - 1;
1065     sum1 = tmp[i];
1066     while (nz--) {
1067       sum1 -= (*v++)*tmp[*vi++];
1068     }
1069     x[*c--] = tmp[i] = aa[diag[i]]*sum1;
1070   }
1071 
1072   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1073   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1074   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1075   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1076   PLogFlops(2*1*(a->nz) - a->n);
1077   PetscFunctionReturn(0);
1078 }
1079 /*
1080       Special case where the matrix was ILU(0) factored in the natural
1081    ordering. This eliminates the need for the column and row permutation.
1082 */
1083 #undef __FUNC__
1084 #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering"
1085 int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1086 {
1087   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data;
1088   int             n=a->mbs,*ai=a->i,*aj=a->j;
1089   int             ierr,*diag = a->diag;
1090   MatScalar       *aa=a->a;
1091   Scalar          *x,*b;
1092   Scalar          sum1,x1;
1093   MatScalar       *v;
1094   int             jdx,idt,idx,nz,*vi,i;
1095 
1096   PetscFunctionBegin;
1097   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1098   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1099 
1100   /* forward solve the lower triangular */
1101   idx    = 0;
1102   x[0]   = b[0];
1103   for ( i=1; i<n; i++ ) {
1104     v     =  aa      + ai[i];
1105     vi    =  aj      + ai[i];
1106     nz    =  diag[i] - ai[i];
1107     idx   +=  1;
1108     sum1  =  b[idx];
1109     while (nz--) {
1110       jdx   = *vi++;
1111       x1    = x[jdx];
1112       sum1 -= v[0]*x1;
1113       v    += 1;
1114     }
1115     x[idx]   = sum1;
1116   }
1117   /* backward solve the upper triangular */
1118   for ( i=n-1; i>=0; i-- ){
1119     v    = aa + diag[i] + 1;
1120     vi   = aj + diag[i] + 1;
1121     nz   = ai[i+1] - diag[i] - 1;
1122     idt  = i;
1123     sum1 = x[idt];
1124     while (nz--) {
1125       idx   = *vi++;
1126       x1    = x[idx];
1127       sum1 -= v[0]*x1;
1128       v    += 1;
1129     }
1130     v        = aa +  diag[i];
1131     x[idt]   = v[0]*sum1;
1132   }
1133   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1134   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1135   PLogFlops(2*(a->nz) - a->n);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 /* ----------------------------------------------------------------*/
1140 /*
1141      This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
1142    except that the data structure of Mat_SeqAIJ is slightly different.
1143    Not a good example of code reuse.
1144 */
1145 extern int MatMissingDiag_SeqBAIJ(Mat);
1146 
1147 #undef __FUNC__
1148 #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ"
1149 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
1150 {
1151   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b;
1152   IS          isicol;
1153   int         *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j;
1154   int         *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev;
1155   int         *dloc, idx, row,m,fm, nzf, nzi,len,  realloc = 0, dcount = 0;
1156   int         incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill;
1157   PetscTruth  col_identity, row_identity;
1158   double      f;
1159 
1160   PetscFunctionBegin;
1161   if (info) {
1162     f             = info->fill;
1163     levels        = (int) info->levels;
1164     diagonal_fill = (int) info->diagonal_fill;
1165   } else {
1166     f             = 1.0;
1167     levels        = 0;
1168     diagonal_fill = 0;
1169   }
1170   ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr);
1171 
1172   /* special case that simply copies fill pattern */
1173   PetscValidHeaderSpecific(isrow,IS_COOKIE);
1174   PetscValidHeaderSpecific(iscol,IS_COOKIE);
1175   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1176   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1177   if (levels == 0 && row_identity && col_identity) {
1178     ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
1179     (*fact)->factor = FACTOR_LU;
1180     b               = (Mat_SeqBAIJ *) (*fact)->data;
1181     if (!b->diag) {
1182       ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
1183     }
1184     ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr);
1185     b->row        = isrow;
1186     b->col        = iscol;
1187     b->icol       = isicol;
1188     b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
1189    /*
1190         Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1191         for ILU(0) factorization with natural ordering
1192    */
1193     switch (b->bs) {
1194     case 2:
1195       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1196       (*fact)->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1197       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1198       break;
1199     case 3:
1200       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1201       (*fact)->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1202       PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1203       break;
1204     case 4:
1205       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1206       (*fact)->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1207       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1208       break;
1209     case 5:
1210       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1211       (*fact)->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1212       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1213       break;
1214     case 6:
1215       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1216       (*fact)->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1217       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1218       break;
1219     case 7:
1220       (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1221       (*fact)->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1222       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1223     break;
1224   }
1225     PetscFunctionReturn(0);
1226   }
1227 
1228   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1229   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1230 
1231   /* get new row pointers */
1232   ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew);
1233   ainew[0] = 0;
1234   /* don't know how many column pointers are needed so estimate */
1235   jmax = (int) (f*ai[n] + 1);
1236   ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew);
1237   /* ajfill is level of fill for each fill entry */
1238   ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill);
1239   /* fill is a linked list of nonzeros in active row */
1240   fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill);
1241   /* im is level for each filled value */
1242   im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im);
1243   /* dloc is location of diagonal in factor */
1244   dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc);
1245   dloc[0]  = 0;
1246   for ( prow=0; prow<n; prow++ ) {
1247 
1248     /* copy prow into linked list */
1249     nzf        = nz  = ai[r[prow]+1] - ai[r[prow]];
1250     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix");
1251     xi         = aj + ai[r[prow]];
1252     fill[n]    = n;
1253     fill[prow] = -1; /* marker for diagonal entry */
1254     while (nz--) {
1255       fm  = n;
1256       idx = ic[*xi++];
1257       do {
1258         m  = fm;
1259         fm = fill[m];
1260       } while (fm < idx);
1261       fill[m]   = idx;
1262       fill[idx] = fm;
1263       im[idx]   = 0;
1264     }
1265 
1266     /* make sure diagonal entry is included */
1267     if (diagonal_fill && fill[prow] == -1) {
1268       fm = n;
1269       while (fill[fm] < prow) fm = fill[fm];
1270       fill[prow] = fill[fm];  /* insert diagonal into linked list */
1271       fill[fm]   = prow;
1272       im[prow]   = 0;
1273       nzf++;
1274       dcount++;
1275     }
1276 
1277     nzi = 0;
1278     row = fill[n];
1279     while ( row < prow ) {
1280       incrlev = im[row] + 1;
1281       nz      = dloc[row];
1282       xi      = ajnew  + ainew[row] + nz + 1;
1283       flev    = ajfill + ainew[row] + nz + 1;
1284       nnz     = ainew[row+1] - ainew[row] - nz - 1;
1285       fm      = row;
1286       while (nnz-- > 0) {
1287         idx = *xi++;
1288         if (*flev + incrlev > levels) {
1289           flev++;
1290           continue;
1291         }
1292         do {
1293           m  = fm;
1294           fm = fill[m];
1295         } while (fm < idx);
1296         if (fm != idx) {
1297           im[idx]   = *flev + incrlev;
1298           fill[m]   = idx;
1299           fill[idx] = fm;
1300           fm        = idx;
1301           nzf++;
1302         } else {
1303           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
1304         }
1305         flev++;
1306       }
1307       row = fill[row];
1308       nzi++;
1309     }
1310     /* copy new filled row into permanent storage */
1311     ainew[prow+1] = ainew[prow] + nzf;
1312     if (ainew[prow+1] > jmax) {
1313 
1314       /* estimate how much additional space we will need */
1315       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1316       /* just double the memory each time */
1317       int maxadd = jmax;
1318       /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */
1319       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1320       jmax += maxadd;
1321 
1322       /* allocate a longer ajnew and ajfill */
1323       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1324       ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr);
1325       ierr = PetscFree(ajnew);CHKERRQ(ierr);
1326       ajnew = xi;
1327       xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi);
1328       ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr);
1329       ierr = PetscFree(ajfill);CHKERRQ(ierr);
1330       ajfill = xi;
1331       realloc++; /* count how many reallocations are needed */
1332     }
1333     xi          = ajnew + ainew[prow];
1334     flev        = ajfill + ainew[prow];
1335     dloc[prow]  = nzi;
1336     fm          = fill[n];
1337     while (nzf--) {
1338       *xi++   = fm;
1339       *flev++ = im[fm];
1340       fm      = fill[fm];
1341     }
1342     /* make sure row has diagonal entry */
1343     if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1344       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\
1345     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1346     }
1347   }
1348   ierr = PetscFree(ajfill);CHKERRQ(ierr);
1349   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1350   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1351   ierr = PetscFree(fill);CHKERRQ(ierr);
1352   ierr = PetscFree(im);CHKERRQ(ierr);
1353 
1354   {
1355     double af = ((double)ainew[n])/((double)ai[n]);
1356     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",
1357              realloc,f,af);
1358     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af);
1359     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af);
1360     PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n");
1361     if (diagonal_fill) {
1362       PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
1363     }
1364   }
1365 
1366   /* put together the new matrix */
1367   ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr);
1368   PLogObjectParent(*fact,isicol);
1369   b = (Mat_SeqBAIJ *) (*fact)->data;
1370   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1371   b->singlemalloc = 0;
1372   len = bs2*ainew[n]*sizeof(MatScalar);
1373   /* the next line frees the default space generated by the Create() */
1374   ierr = PetscFree(b->a);CHKERRQ(ierr);
1375   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1376   b->a          = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1377   b->j          = ajnew;
1378   b->i          = ainew;
1379   for ( i=0; i<n; i++ ) dloc[i] += ainew[i];
1380   b->diag       = dloc;
1381   b->ilen       = 0;
1382   b->imax       = 0;
1383   b->row        = isrow;
1384   b->col        = iscol;
1385   b->icol       = isicol;
1386   b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work);
1387   /* In b structure:  Free imax, ilen, old a, old j.
1388      Allocate dloc, solve_work, new a, new j */
1389   PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
1390   b->maxnz          = b->nz = ainew[n];
1391   (*fact)->factor   = FACTOR_LU;
1392 
1393   (*fact)->info.factor_mallocs    = realloc;
1394   (*fact)->info.fill_ratio_given  = f;
1395   (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]);
1396 
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 
1401 
1402 
1403