xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision d44834fb5aa6ab55eaa2e18f09dfd00e40aa4639)
1 /*
2     Factorization code for SBAIJ format.
3 */
4 
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 #include "src/inline/dot.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
13 {
14   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
15   IS              isrow=a->row;
16   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
17   PetscErrorCode  ierr;
18   PetscInt        nz,*vj,k,*r,idx,k1;
19   PetscInt        bs=A->bs,bs2 = a->bs2;
20   MatScalar       *aa=a->a,*v,*diag;
21   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
22 
23   PetscFunctionBegin;
24   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
25   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26   t  = a->solve_work;
27   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
29 
30   /* solve U^T * D * y = b by forward substitution */
31   xk = t;
32   for (k=0; k<mbs; k++) { /* t <- perm(b) */
33     idx   = bs*r[k];
34     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
35   }
36   for (k=0; k<mbs; k++){
37     v  = aa + bs2*ai[k];
38     xk = t + k*bs;      /* Dk*xk = k-th block of x */
39     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
40     nz = ai[k+1] - ai[k];
41     vj = aj + ai[k];
42     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
43     while (nz--) {
44       /* x(:) += U(k,:)^T*(Dk*xk) */
45       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
46       vj++; xj = t + (*vj)*bs;
47       v += bs2;
48     }
49     /* xk = inv(Dk)*(Dk*xk) */
50     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
51     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
52   }
53 
54   /* solve U*x = y by back substitution */
55   for (k=mbs-1; k>=0; k--){
56     v  = aa + bs2*ai[k];
57     xk = t + k*bs;        /* xk */
58     nz = ai[k+1] - ai[k];
59     vj = aj + ai[k];
60     xj = t + (*vj)*bs;
61     while (nz--) {
62       /* xk += U(k,:)*x(:) */
63       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
64       vj++;
65       v += bs2; xj = t + (*vj)*bs;
66     }
67     idx   = bs*r[k];
68     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
69   }
70 
71   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
72   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
73   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
74   PetscLogFlops(bs2*(2*a->nz + mbs));
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
80 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
81 {
82   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
83   PetscErrorCode ierr;
84   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
85   PetscInt       nz,*vj,k;
86   PetscInt       bs=A->bs,bs2 = a->bs2;
87   MatScalar      *aa=a->a,*v,*diag;
88   PetscScalar    *x,*xk,*xj,*b,*xk_tmp;
89 
90   PetscFunctionBegin;
91 
92   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
93   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
94 
95   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
96 
97   /* solve U^T * D * y = b by forward substitution */
98   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
99   for (k=0; k<mbs; k++){
100     v  = aa + bs2*ai[k];
101     xk = x + k*bs;      /* Dk*xk = k-th block of x */
102     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
103     nz = ai[k+1] - ai[k];
104     vj = aj + ai[k];
105     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
106     while (nz--) {
107       /* x(:) += U(k,:)^T*(Dk*xk) */
108       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
109       vj++; xj = x + (*vj)*bs;
110       v += bs2;
111     }
112     /* xk = inv(Dk)*(Dk*xk) */
113     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
114     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
115   }
116 
117   /* solve U*x = y by back substitution */
118   for (k=mbs-1; k>=0; k--){
119     v  = aa + bs2*ai[k];
120     xk = x + k*bs;        /* xk */
121     nz = ai[k+1] - ai[k];
122     vj = aj + ai[k];
123     xj = x + (*vj)*bs;
124     while (nz--) {
125       /* xk += U(k,:)*x(:) */
126       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
127       vj++;
128       v += bs2; xj = x + (*vj)*bs;
129     }
130   }
131 
132   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
133   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
134   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
135   PetscLogFlops(bs2*(2*a->nz + mbs));
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
141 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
142 {
143   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
144   IS             isrow=a->row;
145   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
146   PetscErrorCode ierr;
147   PetscInt       nz,*vj,k,*r,idx;
148   MatScalar      *aa=a->a,*v,*d;
149   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
150 
151   PetscFunctionBegin;
152   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
153   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
154   t  = a->solve_work;
155   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
156 
157   /* solve U^T * D * y = b by forward substitution */
158   tp = t;
159   for (k=0; k<mbs; k++) { /* t <- perm(b) */
160     idx   = 7*r[k];
161     tp[0] = b[idx];
162     tp[1] = b[idx+1];
163     tp[2] = b[idx+2];
164     tp[3] = b[idx+3];
165     tp[4] = b[idx+4];
166     tp[5] = b[idx+5];
167     tp[6] = b[idx+6];
168     tp += 7;
169   }
170 
171   for (k=0; k<mbs; k++){
172     v  = aa + 49*ai[k];
173     vj = aj + ai[k];
174     tp = t + k*7;
175     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
176     nz = ai[k+1] - ai[k];
177     tp = t + (*vj)*7;
178     while (nz--) {
179       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
180       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
181       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
182       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
183       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
184       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
185       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
186       vj++; tp = t + (*vj)*7;
187       v += 49;
188     }
189 
190     /* xk = inv(Dk)*(Dk*xk) */
191     d  = aa+k*49;          /* ptr to inv(Dk) */
192     tp    = t + k*7;
193     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
194     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
195     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
196     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
197     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
198     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
199     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
200   }
201 
202   /* solve U*x = y by back substitution */
203   for (k=mbs-1; k>=0; k--){
204     v  = aa + 49*ai[k];
205     vj = aj + ai[k];
206     tp    = t + k*7;
207     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
208     nz = ai[k+1] - ai[k];
209 
210     tp = t + (*vj)*7;
211     while (nz--) {
212       /* xk += U(k,:)*x(:) */
213       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
214       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
215       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
216       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
217       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
218       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
219       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
220       vj++; tp = t + (*vj)*7;
221       v += 49;
222     }
223     tp    = t + k*7;
224     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
225     idx   = 7*r[k];
226     x[idx]     = x0;
227     x[idx+1]   = x1;
228     x[idx+2]   = x2;
229     x[idx+3]   = x3;
230     x[idx+4]   = x4;
231     x[idx+5]   = x5;
232     x[idx+6]   = x6;
233   }
234 
235   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
236   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
237   PetscLogFlops(49*(2*a->nz + mbs));
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
243 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
244 {
245   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
246   PetscErrorCode ierr;
247   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
248   MatScalar      *aa=a->a,*v,*d;
249   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
250   PetscInt       nz,*vj,k;
251 
252   PetscFunctionBegin;
253   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
254   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
255 
256   /* solve U^T * D * y = b by forward substitution */
257   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
258   for (k=0; k<mbs; k++){
259     v  = aa + 49*ai[k];
260     xp = x + k*7;
261     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
262     nz = ai[k+1] - ai[k];
263     vj = aj + ai[k];
264     xp = x + (*vj)*7;
265     while (nz--) {
266       /* x(:) += U(k,:)^T*(Dk*xk) */
267       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
268       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
269       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
270       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
271       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
272       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
273       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
274       vj++; xp = x + (*vj)*7;
275       v += 49;
276     }
277     /* xk = inv(Dk)*(Dk*xk) */
278     d  = aa+k*49;          /* ptr to inv(Dk) */
279     xp = x + k*7;
280     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
281     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
282     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
283     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
284     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
285     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
286     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
287   }
288 
289   /* solve U*x = y by back substitution */
290   for (k=mbs-1; k>=0; k--){
291     v  = aa + 49*ai[k];
292     xp = x + k*7;
293     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
294     nz = ai[k+1] - ai[k];
295     vj = aj + ai[k];
296     xp = x + (*vj)*7;
297     while (nz--) {
298       /* xk += U(k,:)*x(:) */
299       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
300       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
301       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
302       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
303       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
304       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
305       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
306       vj++;
307       v += 49; xp = x + (*vj)*7;
308     }
309     xp = x + k*7;
310     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
311   }
312 
313   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
314   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
315   PetscLogFlops(49*(2*a->nz + mbs));
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNCT__
320 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
321 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
322 {
323   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
324   IS             isrow=a->row;
325   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
326   PetscErrorCode ierr;
327   PetscInt       nz,*vj,k,*r,idx;
328   MatScalar      *aa=a->a,*v,*d;
329   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
330 
331   PetscFunctionBegin;
332   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
333   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
334   t  = a->solve_work;
335   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
336 
337   /* solve U^T * D * y = b by forward substitution */
338   tp = t;
339   for (k=0; k<mbs; k++) { /* t <- perm(b) */
340     idx   = 6*r[k];
341     tp[0] = b[idx];
342     tp[1] = b[idx+1];
343     tp[2] = b[idx+2];
344     tp[3] = b[idx+3];
345     tp[4] = b[idx+4];
346     tp[5] = b[idx+5];
347     tp += 6;
348   }
349 
350   for (k=0; k<mbs; k++){
351     v  = aa + 36*ai[k];
352     vj = aj + ai[k];
353     tp = t + k*6;
354     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
355     nz = ai[k+1] - ai[k];
356     tp = t + (*vj)*6;
357     while (nz--) {
358       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
359       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
360       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
361       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
362       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
363       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
364       vj++; tp = t + (*vj)*6;
365       v += 36;
366     }
367 
368     /* xk = inv(Dk)*(Dk*xk) */
369     d  = aa+k*36;          /* ptr to inv(Dk) */
370     tp    = t + k*6;
371     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
372     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
373     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
374     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
375     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
376     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
377   }
378 
379   /* solve U*x = y by back substitution */
380   for (k=mbs-1; k>=0; k--){
381     v  = aa + 36*ai[k];
382     vj = aj + ai[k];
383     tp    = t + k*6;
384     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
385     nz = ai[k+1] - ai[k];
386 
387     tp = t + (*vj)*6;
388     while (nz--) {
389       /* xk += U(k,:)*x(:) */
390       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
391       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
392       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
393       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
394       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
395       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
396       vj++; tp = t + (*vj)*6;
397       v += 36;
398     }
399     tp    = t + k*6;
400     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
401     idx   = 6*r[k];
402     x[idx]     = x0;
403     x[idx+1]   = x1;
404     x[idx+2]   = x2;
405     x[idx+3]   = x3;
406     x[idx+4]   = x4;
407     x[idx+5]   = x5;
408   }
409 
410   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
411   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
412   PetscLogFlops(36*(2*a->nz + mbs));
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
418 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
419 {
420   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
421   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
422   MatScalar      *aa=a->a,*v,*d;
423   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5;
424   PetscErrorCode ierr;
425   PetscInt       nz,*vj,k;
426 
427   PetscFunctionBegin;
428 
429   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
430   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
431 
432   /* solve U^T * D * y = b by forward substitution */
433   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
434   for (k=0; k<mbs; k++){
435     v  = aa + 36*ai[k];
436     xp = x + k*6;
437     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
438     nz = ai[k+1] - ai[k];
439     vj = aj + ai[k];
440     xp = x + (*vj)*6;
441     while (nz--) {
442       /* x(:) += U(k,:)^T*(Dk*xk) */
443       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
444       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
445       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
446       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
447       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
448       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
449       vj++; xp = x + (*vj)*6;
450       v += 36;
451     }
452     /* xk = inv(Dk)*(Dk*xk) */
453     d  = aa+k*36;          /* ptr to inv(Dk) */
454     xp = x + k*6;
455     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
456     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
457     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
458     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
459     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
460     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
461   }
462 
463   /* solve U*x = y by back substitution */
464   for (k=mbs-1; k>=0; k--){
465     v  = aa + 36*ai[k];
466     xp = x + k*6;
467     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
468     nz = ai[k+1] - ai[k];
469     vj = aj + ai[k];
470     xp = x + (*vj)*6;
471     while (nz--) {
472       /* xk += U(k,:)*x(:) */
473       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
474       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
475       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
476       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
477       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
478       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
479       vj++;
480       v += 36; xp = x + (*vj)*6;
481     }
482     xp = x + k*6;
483     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
484   }
485 
486   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
487   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
488   PetscLogFlops(36*(2*a->nz + mbs));
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
494 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
495 {
496   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
497   IS             isrow=a->row;
498   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
499   PetscErrorCode ierr;
500   PetscInt       nz,*vj,k,*r,idx;
501   MatScalar      *aa=a->a,*v,*diag;
502   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
503 
504   PetscFunctionBegin;
505   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
506   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
507   t  = a->solve_work;
508   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
509 
510   /* solve U^T * D * y = b by forward substitution */
511   tp = t;
512   for (k=0; k<mbs; k++) { /* t <- perm(b) */
513     idx   = 5*r[k];
514     tp[0] = b[idx];
515     tp[1] = b[idx+1];
516     tp[2] = b[idx+2];
517     tp[3] = b[idx+3];
518     tp[4] = b[idx+4];
519     tp += 5;
520   }
521 
522   for (k=0; k<mbs; k++){
523     v  = aa + 25*ai[k];
524     vj = aj + ai[k];
525     tp = t + k*5;
526     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
527     nz = ai[k+1] - ai[k];
528 
529     tp = t + (*vj)*5;
530     while (nz--) {
531       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
532       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
533       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
534       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
535       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
536       vj++; tp = t + (*vj)*5;
537       v += 25;
538     }
539 
540     /* xk = inv(Dk)*(Dk*xk) */
541     diag  = aa+k*25;          /* ptr to inv(Dk) */
542     tp    = t + k*5;
543       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
544       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
545       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
546       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
547       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
548   }
549 
550   /* solve U*x = y by back substitution */
551   for (k=mbs-1; k>=0; k--){
552     v  = aa + 25*ai[k];
553     vj = aj + ai[k];
554     tp    = t + k*5;
555     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
556     nz = ai[k+1] - ai[k];
557 
558     tp = t + (*vj)*5;
559     while (nz--) {
560       /* xk += U(k,:)*x(:) */
561       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
562       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
563       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
564       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
565       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
566       vj++; tp = t + (*vj)*5;
567       v += 25;
568     }
569     tp    = t + k*5;
570     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
571     idx   = 5*r[k];
572     x[idx]     = x0;
573     x[idx+1]   = x1;
574     x[idx+2]   = x2;
575     x[idx+3]   = x3;
576     x[idx+4]   = x4;
577   }
578 
579   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
580   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
581   PetscLogFlops(25*(2*a->nz + mbs));
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
587 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
588 {
589   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
590   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
591   MatScalar      *aa=a->a,*v,*diag;
592   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4;
593   PetscErrorCode ierr;
594   PetscInt       nz,*vj,k;
595 
596   PetscFunctionBegin;
597 
598   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
599   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
600 
601   /* solve U^T * D * y = b by forward substitution */
602   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
603   for (k=0; k<mbs; k++){
604     v  = aa + 25*ai[k];
605     xp = x + k*5;
606     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
607     nz = ai[k+1] - ai[k];
608     vj = aj + ai[k];
609     xp = x + (*vj)*5;
610     while (nz--) {
611       /* x(:) += U(k,:)^T*(Dk*xk) */
612       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
613       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
614       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
615       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
616       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
617       vj++; xp = x + (*vj)*5;
618       v += 25;
619     }
620     /* xk = inv(Dk)*(Dk*xk) */
621     diag = aa+k*25;          /* ptr to inv(Dk) */
622     xp   = x + k*5;
623     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
624     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
625     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
626     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
627     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
628   }
629 
630   /* solve U*x = y by back substitution */
631   for (k=mbs-1; k>=0; k--){
632     v  = aa + 25*ai[k];
633     xp = x + k*5;
634     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
635     nz = ai[k+1] - ai[k];
636     vj = aj + ai[k];
637     xp = x + (*vj)*5;
638     while (nz--) {
639       /* xk += U(k,:)*x(:) */
640       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
641       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
642       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
643       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
644       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
645       vj++;
646       v += 25; xp = x + (*vj)*5;
647     }
648     xp = x + k*5;
649     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
650   }
651 
652   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
653   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
654   PetscLogFlops(25*(2*a->nz + mbs));
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
660 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
661 {
662   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
663   IS             isrow=a->row;
664   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
665   PetscErrorCode ierr;
666   PetscInt       nz,*vj,k,*r,idx;
667   MatScalar      *aa=a->a,*v,*diag;
668   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
669 
670   PetscFunctionBegin;
671   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
672   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
673   t  = a->solve_work;
674   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
675 
676   /* solve U^T * D * y = b by forward substitution */
677   tp = t;
678   for (k=0; k<mbs; k++) { /* t <- perm(b) */
679     idx   = 4*r[k];
680     tp[0] = b[idx];
681     tp[1] = b[idx+1];
682     tp[2] = b[idx+2];
683     tp[3] = b[idx+3];
684     tp += 4;
685   }
686 
687   for (k=0; k<mbs; k++){
688     v  = aa + 16*ai[k];
689     vj = aj + ai[k];
690     tp = t + k*4;
691     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
692     nz = ai[k+1] - ai[k];
693 
694     tp = t + (*vj)*4;
695     while (nz--) {
696       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
697       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
698       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
699       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
700       vj++; tp = t + (*vj)*4;
701       v += 16;
702     }
703 
704     /* xk = inv(Dk)*(Dk*xk) */
705     diag  = aa+k*16;          /* ptr to inv(Dk) */
706     tp    = t + k*4;
707     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
708     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
709     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
710     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
711   }
712 
713   /* solve U*x = y by back substitution */
714   for (k=mbs-1; k>=0; k--){
715     v  = aa + 16*ai[k];
716     vj = aj + ai[k];
717     tp    = t + k*4;
718     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
719     nz = ai[k+1] - ai[k];
720 
721     tp = t + (*vj)*4;
722     while (nz--) {
723       /* xk += U(k,:)*x(:) */
724       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
725       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
726       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
727       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
728       vj++; tp = t + (*vj)*4;
729       v += 16;
730     }
731     tp    = t + k*4;
732     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
733     idx        = 4*r[k];
734     x[idx]     = x0;
735     x[idx+1]   = x1;
736     x[idx+2]   = x2;
737     x[idx+3]   = x3;
738   }
739 
740   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
741   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
742   PetscLogFlops(16*(2*a->nz + mbs));
743   PetscFunctionReturn(0);
744 }
745 
746 /*
747    Special case where the matrix was factored in the natural ordering.
748    This eliminates the need for the column and row permutation.
749 */
750 #undef __FUNCT__
751 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
752 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
753 {
754   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
755   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
756   MatScalar      *aa=a->a,*v,*diag;
757   PetscScalar    *x,*xp,*b,x0,x1,x2,x3;
758   PetscErrorCode ierr;
759   PetscInt       nz,*vj,k;
760 
761   PetscFunctionBegin;
762 
763   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
764   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
765 
766   /* solve U^T * D * y = b by forward substitution */
767   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
768   for (k=0; k<mbs; k++){
769     v  = aa + 16*ai[k];
770     xp = x + k*4;
771     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
772     nz = ai[k+1] - ai[k];
773     vj = aj + ai[k];
774     xp = x + (*vj)*4;
775     while (nz--) {
776       /* x(:) += U(k,:)^T*(Dk*xk) */
777       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
778       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
779       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
780       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
781       vj++; xp = x + (*vj)*4;
782       v += 16;
783     }
784     /* xk = inv(Dk)*(Dk*xk) */
785     diag = aa+k*16;          /* ptr to inv(Dk) */
786     xp   = x + k*4;
787     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
788     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
789     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
790     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
791   }
792 
793   /* solve U*x = y by back substitution */
794   for (k=mbs-1; k>=0; k--){
795     v  = aa + 16*ai[k];
796     xp = x + k*4;
797     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
798     nz = ai[k+1] - ai[k];
799     vj = aj + ai[k];
800     xp = x + (*vj)*4;
801     while (nz--) {
802       /* xk += U(k,:)*x(:) */
803       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
804       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
805       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
806       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
807       vj++;
808       v += 16; xp = x + (*vj)*4;
809     }
810     xp = x + k*4;
811     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
812   }
813 
814   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
815   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
816   PetscLogFlops(16*(2*a->nz + mbs));
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
822 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
823 {
824   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
825   IS             isrow=a->row;
826   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
827   PetscErrorCode ierr;
828   PetscInt       nz,*vj,k,*r,idx;
829   MatScalar      *aa=a->a,*v,*diag;
830   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
831 
832   PetscFunctionBegin;
833   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
834   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
835   t  = a->solve_work;
836   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
837 
838   /* solve U^T * D * y = b by forward substitution */
839   tp = t;
840   for (k=0; k<mbs; k++) { /* t <- perm(b) */
841     idx   = 3*r[k];
842     tp[0] = b[idx];
843     tp[1] = b[idx+1];
844     tp[2] = b[idx+2];
845     tp += 3;
846   }
847 
848   for (k=0; k<mbs; k++){
849     v  = aa + 9*ai[k];
850     vj = aj + ai[k];
851     tp = t + k*3;
852     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
853     nz = ai[k+1] - ai[k];
854 
855     tp = t + (*vj)*3;
856     while (nz--) {
857       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
858       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
859       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
860       vj++; tp = t + (*vj)*3;
861       v += 9;
862     }
863 
864     /* xk = inv(Dk)*(Dk*xk) */
865     diag  = aa+k*9;          /* ptr to inv(Dk) */
866     tp    = t + k*3;
867     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
868     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
869     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
870   }
871 
872   /* solve U*x = y by back substitution */
873   for (k=mbs-1; k>=0; k--){
874     v  = aa + 9*ai[k];
875     vj = aj + ai[k];
876     tp    = t + k*3;
877     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
878     nz = ai[k+1] - ai[k];
879 
880     tp = t + (*vj)*3;
881     while (nz--) {
882       /* xk += U(k,:)*x(:) */
883       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
884       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
885       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
886       vj++; tp = t + (*vj)*3;
887       v += 9;
888     }
889     tp    = t + k*3;
890     tp[0] = x0; tp[1] = x1; tp[2] = x2;
891     idx      = 3*r[k];
892     x[idx]   = x0;
893     x[idx+1] = x1;
894     x[idx+2] = x2;
895   }
896 
897   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
898   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
899   PetscLogFlops(9*(2*a->nz + mbs));
900   PetscFunctionReturn(0);
901 }
902 
903 /*
904    Special case where the matrix was factored in the natural ordering.
905    This eliminates the need for the column and row permutation.
906 */
907 #undef __FUNCT__
908 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
909 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
910 {
911   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
912   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
913   MatScalar      *aa=a->a,*v,*diag;
914   PetscScalar    *x,*xp,*b,x0,x1,x2;
915   PetscErrorCode ierr;
916   PetscInt       nz,*vj,k;
917 
918   PetscFunctionBegin;
919 
920   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
921   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
922 
923   /* solve U^T * D * y = b by forward substitution */
924   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
925   for (k=0; k<mbs; k++){
926     v  = aa + 9*ai[k];
927     xp = x + k*3;
928     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
929     nz = ai[k+1] - ai[k];
930     vj = aj + ai[k];
931     xp = x + (*vj)*3;
932     while (nz--) {
933       /* x(:) += U(k,:)^T*(Dk*xk) */
934       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
935       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
936       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
937       vj++; xp = x + (*vj)*3;
938       v += 9;
939     }
940     /* xk = inv(Dk)*(Dk*xk) */
941     diag = aa+k*9;          /* ptr to inv(Dk) */
942     xp   = x + k*3;
943     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
944     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
945     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
946   }
947 
948   /* solve U*x = y by back substitution */
949   for (k=mbs-1; k>=0; k--){
950     v  = aa + 9*ai[k];
951     xp = x + k*3;
952     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
953     nz = ai[k+1] - ai[k];
954     vj = aj + ai[k];
955     xp = x + (*vj)*3;
956     while (nz--) {
957       /* xk += U(k,:)*x(:) */
958       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
959       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
960       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
961       vj++;
962       v += 9; xp = x + (*vj)*3;
963     }
964     xp = x + k*3;
965     xp[0] = x0; xp[1] = x1; xp[2] = x2;
966   }
967 
968   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
969   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
970   PetscLogFlops(9*(2*a->nz + mbs));
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
976 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
977 {
978   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
979   IS             isrow=a->row;
980   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
981   PetscErrorCode ierr;
982   PetscInt       nz,*vj,k,k2,*r,idx;
983   MatScalar      *aa=a->a,*v,*diag;
984   PetscScalar    *x,*b,x0,x1,*t;
985 
986   PetscFunctionBegin;
987   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
989   t  = a->solve_work;
990   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
991   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
992 
993   /* solve U^T * D * y = perm(b) by forward substitution */
994   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
995     idx = 2*r[k];
996     t[k*2]   = b[idx];
997     t[k*2+1] = b[idx+1];
998   }
999   for (k=0; k<mbs; k++){
1000     v  = aa + 4*ai[k];
1001     vj = aj + ai[k];
1002     k2 = k*2;
1003     x0 = t[k2]; x1 = t[k2+1];
1004     nz = ai[k+1] - ai[k];
1005     while (nz--) {
1006       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1007       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1008       vj++; v += 4;
1009     }
1010     diag = aa+k*4;  /* ptr to inv(Dk) */
1011     t[k2]   = diag[0]*x0 + diag[2]*x1;
1012     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1013   }
1014 
1015   /* solve U*x = y by back substitution */
1016   for (k=mbs-1; k>=0; k--){
1017     v  = aa + 4*ai[k];
1018     vj = aj + ai[k];
1019     k2 = k*2;
1020     x0 = t[k2]; x1 = t[k2+1];
1021     nz = ai[k+1] - ai[k];
1022     while (nz--) {
1023       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1024       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1025       vj++; v += 4;
1026     }
1027     t[k2]    = x0;
1028     t[k2+1]  = x1;
1029     idx      = 2*r[k];
1030     x[idx]   = x0;
1031     x[idx+1] = x1;
1032   }
1033 
1034   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1035   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1036   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1037   PetscLogFlops(4*(2*a->nz + mbs));
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 /*
1042    Special case where the matrix was factored in the natural ordering.
1043    This eliminates the need for the column and row permutation.
1044 */
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1047 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1048 {
1049   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1050   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1051   MatScalar      *aa=a->a,*v,*diag;
1052   PetscScalar    *x,*b,x0,x1;
1053   PetscErrorCode ierr;
1054   PetscInt       nz,*vj,k,k2;
1055 
1056   PetscFunctionBegin;
1057 
1058   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1059   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1060 
1061   /* solve U^T * D * y = b by forward substitution */
1062   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1063   for (k=0; k<mbs; k++){
1064     v  = aa + 4*ai[k];
1065     vj = aj + ai[k];
1066     k2 = k*2;
1067     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1068     nz = ai[k+1] - ai[k];
1069 
1070     while (nz--) {
1071       /* x(:) += U(k,:)^T*(Dk*xk) */
1072       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1073       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1074       vj++; v += 4;
1075     }
1076     /* xk = inv(Dk)*(Dk*xk) */
1077     diag = aa+k*4;          /* ptr to inv(Dk) */
1078     x[k2]   = diag[0]*x0 + diag[2]*x1;
1079     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1080   }
1081 
1082   /* solve U*x = y by back substitution */
1083   for (k=mbs-1; k>=0; k--){
1084     v  = aa + 4*ai[k];
1085     vj = aj + ai[k];
1086     k2 = k*2;
1087     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1088     nz = ai[k+1] - ai[k];
1089     while (nz--) {
1090       /* xk += U(k,:)*x(:) */
1091       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1092       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1093       vj++; v += 4;
1094     }
1095     x[k2]     = x0;
1096     x[k2+1]   = x1;
1097   }
1098 
1099   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1100   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1101   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1107 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1108 {
1109   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1110   IS             isrow=a->row;
1111   PetscErrorCode ierr;
1112   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1113   MatScalar      *aa=a->a,*v;
1114   PetscScalar    *x,*b,xk,*t;
1115   PetscInt       nz,*vj,k;
1116 
1117   PetscFunctionBegin;
1118   if (!mbs) PetscFunctionReturn(0);
1119 
1120   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1121   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1122   t    = a->solve_work;
1123 
1124   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1125 
1126   /* solve U^T*D*y = perm(b) by forward substitution */
1127   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1128   for (k=0; k<mbs; k++){
1129     v  = aa + ai[k];
1130     vj = aj + ai[k];
1131     xk = t[k];
1132     nz = ai[k+1] - ai[k];
1133     while (nz--) t[*vj++] += (*v++) * xk;
1134     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1135   }
1136 
1137   /* solve U*x = y by back substitution */
1138   for (k=mbs-1; k>=0; k--){
1139     v  = aa + ai[k];
1140     vj = aj + ai[k];
1141     xk = t[k];
1142     nz = ai[k+1] - ai[k];
1143     while (nz--) xk += (*v++) * t[*vj++];
1144     t[k]      = xk;
1145     x[rip[k]] = xk;
1146   }
1147 
1148   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1149   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1150   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1151   PetscLogFlops(4*a->nz + A->m);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1157 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1158 {
1159   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   if (A->bs == 1) {
1164     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1165   } else {
1166     IS              isrow=a->row;
1167     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1168     MatScalar       *aa=a->a,*v;
1169     PetscScalar     *x,*b,*t;
1170     PetscInt             nz,*vj,k,n;
1171     if (bb->n > a->solves_work_n) {
1172       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1173       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1174       a->solves_work_n = bb->n;
1175     }
1176     n    = bb->n;
1177     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1178     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1179     t    = a->solves_work;
1180 
1181     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1182 
1183     /* solve U^T*D*y = perm(b) by forward substitution */
1184     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1185     for (k=0; k<mbs; k++){
1186       v  = aa + ai[k];
1187       vj = aj + ai[k];
1188       nz = ai[k+1] - ai[k];
1189       while (nz--) {
1190         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1191         v++;vj++;
1192       }
1193       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1194     }
1195 
1196     /* solve U*x = y by back substitution */
1197     for (k=mbs-1; k>=0; k--){
1198       v  = aa + ai[k];
1199       vj = aj + ai[k];
1200       nz = ai[k+1] - ai[k];
1201       while (nz--) {
1202         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1203         v++;vj++;
1204       }
1205       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1206     }
1207 
1208     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1209     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1210     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1211     PetscLogFlops(bb->n*(4*a->nz + A->m));
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217       Special case where the matrix was ILU(0) factored in the natural
1218    ordering. This eliminates the need for the column and row permutation.
1219 */
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1222 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1223 {
1224   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1225   PetscErrorCode ierr;
1226   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1227   MatScalar      *aa=a->a,*v;
1228   PetscScalar    *x,*b,xk;
1229   PetscInt       nz,*vj,k;
1230 
1231   PetscFunctionBegin;
1232   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1233   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1234 
1235   /* solve U^T*D*y = b by forward substitution */
1236   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1237   for (k=0; k<mbs; k++){
1238     v  = aa + ai[k] + 1;
1239     vj = aj + ai[k] + 1;
1240     xk = x[k];
1241     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1242     while (nz--) x[*vj++] += (*v++) * xk;
1243     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1244   }
1245 
1246   /* solve U*x = y by back substitution */
1247   for (k=mbs-2; k>=0; k--){
1248     v  = aa + ai[k] + 1;
1249     vj = aj + ai[k] + 1;
1250     xk = x[k];
1251     nz = ai[k+1] - ai[k] - 1;
1252     while (nz--) xk += (*v++) * x[*vj++];
1253     x[k] = xk;
1254   }
1255 
1256   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1257   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1258   PetscLogFlops(4*a->nz + A->m);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1265 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1266 {
1267   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1268   PetscErrorCode ierr;
1269   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1270   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
1271   PetscInt       m,reallocs = 0,*levtmp;
1272   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1273   PetscInt       incrlev,*lev,shift,prow,nz;
1274   PetscInt       *il,ili,nextprow;
1275   PetscReal      f = info->fill,levels = info->levels;
1276   PetscTruth     perm_identity;
1277 
1278   PetscFunctionBegin;
1279   /* check whether perm is the identity mapping */
1280   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1281 
1282   /* special case that simply copies fill pattern */
1283   if (!levels && perm_identity && bs==1) {
1284     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1285     (*B)->factor    = FACTOR_CHOLESKY;
1286     b               = (Mat_SeqSBAIJ*)(*B)->data;
1287     b->row          = perm;
1288     b->icol         = perm;
1289     b->factor_damping   = info->damping;
1290     b->factor_shift     = info->shift;
1291     b->factor_zeropivot = info->zeropivot;
1292     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1293     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1294     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1295     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1296     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1297     PetscFunctionReturn(0);
1298   }
1299 
1300   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1301   if (levels > 0 && perm_identity && bs==1 ){
1302     if (!perm_identity) a->permute = PETSC_TRUE;
1303 
1304   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1305 
1306   if (perm_identity){ /* without permutation */
1307     ai = a->i; aj = a->j;
1308   } else {            /* non-trivial permutation */
1309     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1310     ai = a->inew; aj = a->jnew;
1311   }
1312 
1313   /* initialization */
1314   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1315   umax  = (PetscInt)(f*ai[mbs] + 1);
1316   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1317   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1318   iu[0] = 0;
1319   juidx = 0; /* index for ju */
1320   ierr  = PetscMalloc((4*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1321   q      = jl + mbs;   /* linked list for col index of active row */
1322   levtmp = q + mbs;
1323   il     = levtmp + mbs;
1324   for (i=0; i<mbs; i++){
1325     jl[i] = mbs;
1326     q[i]  = 0;
1327     il[i] = 0;
1328   }
1329 
1330   /* for each row k */
1331   for (k=0; k<mbs; k++){
1332     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1333     q[k] = mbs;
1334     /* initialize nonzero structure of k-th row to row rip[k] of A */
1335     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1336     jmax = ai[rip[k]+1];
1337     for (j=jmin; j<jmax; j++){
1338       vj = rip[aj[j]]; /* col. value */
1339       if(vj > k){
1340         qm = k;
1341         do {
1342           m  = qm; qm = q[m];
1343         } while(qm < vj);
1344         if (qm == vj) {
1345           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1346         }
1347         nzk++;
1348         q[m]  = vj;
1349         q[vj] = qm;
1350         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1351       } /* if(vj > k) */
1352     } /* for (j=jmin; j<jmax; j++) */
1353 
1354     /* modify nonzero structure of k-th row by computing fill-in
1355        for each row i to be merged in */
1356     prow = k;
1357     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1358 
1359     while (prow < k){
1360       nextprow = jl[prow];
1361       /* merge row prow into k-th row */
1362       ili  = il[prow];
1363       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1364       jmax = iu[prow+1];
1365       qm   = k;
1366       for (j=jmin; j<jmax; j++){
1367         vj = ju[j];
1368         incrlev = lev[j] + 1;
1369         if (incrlev > levels) continue;
1370         do {
1371           m = qm; qm = q[m];
1372         } while (qm < vj);
1373         if (qm != vj){  /* a fill */
1374           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1375           levtmp[vj]  = incrlev;
1376         } else {
1377           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1378         }
1379       }
1380       if (jmin < jmax){ /* update il and jl */
1381         il[prow] = jmin;
1382         j = ju[jmin];
1383         jl[prow] = jl[j]; jl[j] = prow;
1384       }
1385       prow = nextprow;
1386     }
1387 
1388     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1389     if (nzk > 0){
1390       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1391       jl[k] = jl[i]; jl[i] = k;
1392       il[k] = iu[k] + 1;
1393     }
1394     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1395 
1396     /* allocate more space to ju if needed */
1397     if (iu[k+1] > umax) {
1398       /* estimate how much additional space we will need */
1399       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1400       /* just double the memory each time */
1401       maxadd = umax;
1402       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1403       umax += maxadd;
1404 
1405       /* allocate a longer ju */
1406       ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1407       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1408       ierr = PetscFree(ju);CHKERRQ(ierr);
1409       ju   = jutmp;
1410 
1411       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1412       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(PetscInt));CHKERRQ(ierr);
1413       ierr     = PetscFree(lev);CHKERRQ(ierr);
1414       lev      = jutmp;
1415       reallocs += 2; /* count how many times we realloc */
1416     }
1417 
1418     /* save nonzero structure of k-th row in ju */
1419     ju[juidx]  = k; /* diag[k] */
1420     lev[juidx] = 0;
1421     juidx++;
1422     i = k;
1423     while (nzk --) {
1424       i           = q[i];
1425       ju[juidx] = i;
1426       lev[juidx] = levtmp[i];
1427       juidx++;
1428     }
1429   } /* end of for (k=0; k<mbs; k++) */
1430 
1431   if (ai[mbs] != 0) {
1432     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1433     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1434     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1435     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1436     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1437   } else {
1438      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1439   }
1440 
1441   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1442   ierr = PetscFree(jl);CHKERRQ(ierr);
1443   ierr = PetscFree(lev);CHKERRQ(ierr);
1444 
1445   /* put together the new matrix */
1446   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1447   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1448   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1449 
1450   /* PetscLogObjectParent(*B,iperm); */
1451   b = (Mat_SeqSBAIJ*)(*B)->data;
1452   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1453   b->singlemalloc = PETSC_FALSE;
1454   /* the next line frees the default space generated by the Create() */
1455   ierr = PetscFree(b->a);CHKERRQ(ierr);
1456   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1457   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1458   b->j    = ju;
1459   b->i    = iu;
1460   b->diag = 0;
1461   b->ilen = 0;
1462   b->imax = 0;
1463   b->row  = perm;
1464   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1465   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1466   b->icol = perm;
1467   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1468   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1469   /* In b structure:  Free imax, ilen, old a, old j.
1470      Allocate idnew, solve_work, new a, new j */
1471   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1472   b->maxnz          = b->nz = iu[mbs];
1473   b->factor_damping   = info->damping;
1474   b->factor_shift     = info->shift;
1475   b->factor_zeropivot = info->zeropivot;
1476 
1477   (*B)->factor                 = FACTOR_CHOLESKY;
1478   (*B)->info.factor_mallocs    = reallocs;
1479   (*B)->info.fill_ratio_given  = f;
1480   if (ai[mbs] != 0) {
1481     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1482   } else {
1483     (*B)->info.fill_ratio_needed = 0.0;
1484   }
1485 
1486 
1487   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1488   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1489   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1490 
1491   PetscFunctionReturn(0);
1492   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1493 
1494   if (!perm_identity) a->permute = PETSC_TRUE;
1495   if (perm_identity){
1496     ai = a->i; aj = a->j;
1497   } else { /*  non-trivial permutation */
1498     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1499     ai = a->inew; aj = a->jnew;
1500   }
1501 
1502   /* initialization */
1503   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1504   umax  = (PetscInt)(f*ai[mbs] + 1);
1505   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1506   umax += mbs + 1;
1507   shift = mbs + 1;
1508   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1509   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1510   iu[0] = mbs + 1;
1511   juidx = mbs + 1;
1512   /* prowl: linked list for pivot row */
1513   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1514   /* q: linked list for col index */
1515   q       = prowl + mbs;
1516   levtmp  = q     + mbs;
1517 
1518   for (i=0; i<mbs; i++){
1519     prowl[i] = mbs;
1520     q[i] = 0;
1521   }
1522 
1523   /* for each row k */
1524   for (k=0; k<mbs; k++){
1525     nzk  = 0;
1526     q[k] = mbs;
1527     /* copy current row into linked list */
1528     nz = ai[rip[k]+1] - ai[rip[k]];
1529     j = ai[rip[k]];
1530     while (nz--){
1531       vj = rip[aj[j++]];
1532       if (vj > k){
1533         qm = k;
1534         do {
1535           m  = qm; qm = q[m];
1536         } while(qm < vj);
1537         if (qm == vj) {
1538           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1539         }
1540         nzk++;
1541         q[m]       = vj;
1542         q[vj]      = qm;
1543         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1544       }
1545     }
1546 
1547     /* modify nonzero structure of k-th row by computing fill-in
1548        for each row prow to be merged in */
1549     prow = k;
1550     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1551 
1552     while (prow < k){
1553       /* merge row prow into k-th row */
1554       jmin = iu[prow] + 1;
1555       jmax = iu[prow+1];
1556       qm = k;
1557       for (j=jmin; j<jmax; j++){
1558         incrlev = lev[j-shift] + 1;
1559 	if (incrlev > levels) continue;
1560 
1561         vj      = ju[j];
1562         do {
1563           m = qm; qm = q[m];
1564         } while (qm < vj);
1565         if (qm != vj){      /* a fill */
1566           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1567           levtmp[vj] = incrlev;
1568         } else {
1569           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1570         }
1571       }
1572       prow = prowl[prow]; /* next pivot row */
1573     }
1574 
1575     /* add k to row list for first nonzero element in k-th row */
1576     if (nzk > 1){
1577       i = q[k]; /* col value of first nonzero element in k_th row of U */
1578       prowl[k] = prowl[i]; prowl[i] = k;
1579     }
1580     iu[k+1] = iu[k] + nzk;
1581 
1582     /* allocate more space to ju and lev if needed */
1583     if (iu[k+1] > umax) {
1584       /* estimate how much additional space we will need */
1585       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1586       /* just double the memory each time */
1587       maxadd = umax;
1588       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1589       umax += maxadd;
1590 
1591       /* allocate a longer ju */
1592       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1593       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1594       ierr     = PetscFree(ju);CHKERRQ(ierr);
1595       ju       = jutmp;
1596 
1597       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1598       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1599       ierr     = PetscFree(lev);CHKERRQ(ierr);
1600       lev      = jutmp;
1601       reallocs += 2; /* count how many times we realloc */
1602     }
1603 
1604     /* save nonzero structure of k-th row in ju */
1605     i=k;
1606     while (nzk --) {
1607       i                = q[i];
1608       ju[juidx]        = i;
1609       lev[juidx-shift] = levtmp[i];
1610       juidx++;
1611     }
1612   }
1613 
1614   if (ai[mbs] != 0) {
1615     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1616     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1617     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1618     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1619     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1620   } else {
1621     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1622   }
1623 
1624   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1625   ierr = PetscFree(prowl);CHKERRQ(ierr);
1626   ierr = PetscFree(lev);CHKERRQ(ierr);
1627 
1628   /* put together the new matrix */
1629   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1630   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1631   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1632 
1633   /* PetscLogObjectParent(*B,iperm); */
1634   b    = (Mat_SeqSBAIJ*)(*B)->data;
1635   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1636   b->singlemalloc = PETSC_FALSE;
1637   /* the next line frees the default space generated by the Create() */
1638   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1639   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1640   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1641   b->j    = ju;
1642   b->i    = iu;
1643   b->diag = 0;
1644   b->ilen = 0;
1645   b->imax = 0;
1646 
1647   if (b->row) {
1648     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1649   }
1650   if (b->icol) {
1651     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1652   }
1653   b->row  = perm;
1654   b->icol = perm;
1655   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1656   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1657   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1658   /* In b structure:  Free imax, ilen, old a, old j.
1659      Allocate idnew, solve_work, new a, new j */
1660   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
1661   b->maxnz = b->nz = iu[mbs];
1662 
1663   (*B)->factor                 = FACTOR_CHOLESKY;
1664   (*B)->info.factor_mallocs    = reallocs;
1665   (*B)->info.fill_ratio_given  = f;
1666   if (ai[mbs] != 0) {
1667     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1668   } else {
1669     (*B)->info.fill_ratio_needed = 0.0;
1670   }
1671 
1672   if (perm_identity){
1673     switch (bs) {
1674       case 1:
1675         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1676         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1677         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1678         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1679         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1680         break;
1681       case 2:
1682         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1683         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1684         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1685         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1686         break;
1687       case 3:
1688         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1689         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1690         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1691         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1692         break;
1693       case 4:
1694         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1695         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1696         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1697         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1698         break;
1699       case 5:
1700         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1701         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1702         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1703         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1704         break;
1705       case 6:
1706         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1707         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1708         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1709         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1710         break;
1711       case 7:
1712         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1713         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1714         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1715         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1716       break;
1717       default:
1718         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1719         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1720         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1721         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1722       break;
1723     }
1724   }
1725 
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 
1730 
1731