xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: sbaijfact2.c,v 1.2.1.41 2001/08/07 03:03:01 balay Exp $*/
2 /*
3     Factorization code for SBAIJ format.
4 */
5 
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/ilu.h"
9 #include "src/inline/dot.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering"
13 int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
14 {
15   PetscFunctionBegin;
16   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
17 }
18 
19 #undef __FUNCT__
20 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering"
21 int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
22 {
23   PetscFunctionBegin;
24   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
25 }
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering"
29 int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
30 {
31   PetscFunctionBegin;
32   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering"
37 int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
38 {
39   PetscFunctionBegin;
40   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering"
45 int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
46 {
47   PetscFunctionBegin;
48   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
49 }
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering"
53 int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
54 {
55   PetscFunctionBegin;
56   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering"
61 int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
62 {
63   PetscFunctionBegin;
64   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1"
69 int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
70 {
71   PetscFunctionBegin;
72   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2"
77 int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
78 {
79   PetscFunctionBegin;
80   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3"
85 int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
86 {
87   PetscFunctionBegin;
88   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4"
93 int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
94 {
95   PetscFunctionBegin;
96   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5"
101 int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
102 {
103   PetscFunctionBegin;
104   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6"
109 int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
110 {
111   PetscFunctionBegin;
112   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7"
117 int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
118 {
119   PetscFunctionBegin;
120   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
125 int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
126 {
127   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
128   IS              isrow=a->row;
129   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
130   int             nz,*vj,k,ierr,*r,idx,k1;
131   int             bs=a->bs,bs2 = a->bs2;
132   MatScalar       *aa=a->a,*v,*diag;
133   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
134 
135   PetscFunctionBegin;
136   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
137   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
138   t  = a->solve_work;
139   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
140   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
141 
142   /* solve U^T * D * y = b by forward substitution */
143   xk = t;
144   for (k=0; k<mbs; k++) { /* t <- perm(b) */
145     idx   = bs*r[k];
146     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
147   }
148   for (k=0; k<mbs; k++){
149     v  = aa + bs2*ai[k];
150     xk = t + k*bs;      /* Dk*xk = k-th block of x */
151     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
152     nz = ai[k+1] - ai[k];
153     vj = aj + ai[k];
154     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
155     while (nz--) {
156       /* x(:) += U(k,:)^T*(Dk*xk) */
157       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
158       vj++; xj = t + (*vj)*bs;
159       v += bs2;
160     }
161     /* xk = inv(Dk)*(Dk*xk) */
162     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
163     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
164   }
165 
166   /* solve U*x = y by back substitution */
167   for (k=mbs-1; k>=0; k--){
168     v  = aa + bs2*ai[k];
169     xk = t + k*bs;        /* xk */
170     nz = ai[k+1] - ai[k];
171     vj = aj + ai[k];
172     xj = t + (*vj)*bs;
173     while (nz--) {
174       /* xk += U(k,:)*x(:) */
175       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
176       vj++;
177       v += bs2; xj = t + (*vj)*bs;
178     }
179     idx   = bs*r[k];
180     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
181   }
182 
183   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
184   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
185   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
186   PetscLogFlops(bs2*(2*a->nz + mbs));
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
192 int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
193 {
194   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
195   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
196   int             nz,*vj,k,ierr;
197   int             bs=a->bs,bs2 = a->bs2;
198   MatScalar       *aa=a->a,*v,*diag;
199   PetscScalar     *x,*xk,*xj,*b,*xk_tmp;
200 
201   PetscFunctionBegin;
202 
203   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
204   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
205 
206   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
207 
208   /* solve U^T * D * y = b by forward substitution */
209   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
210   for (k=0; k<mbs; k++){
211     v  = aa + bs2*ai[k];
212     xk = x + k*bs;      /* Dk*xk = k-th block of x */
213     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
214     nz = ai[k+1] - ai[k];
215     vj = aj + ai[k];
216     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
217     while (nz--) {
218       /* x(:) += U(k,:)^T*(Dk*xk) */
219       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
220       vj++; xj = x + (*vj)*bs;
221       v += bs2;
222     }
223     /* xk = inv(Dk)*(Dk*xk) */
224     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
225     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
226   }
227 
228   /* solve U*x = y by back substitution */
229   for (k=mbs-1; k>=0; k--){
230     v  = aa + bs2*ai[k];
231     xk = x + k*bs;        /* xk */
232     nz = ai[k+1] - ai[k];
233     vj = aj + ai[k];
234     xj = x + (*vj)*bs;
235     while (nz--) {
236       /* xk += U(k,:)*x(:) */
237       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
238       vj++;
239       v += bs2; xj = x + (*vj)*bs;
240     }
241   }
242 
243   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
244   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
245   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
246   PetscLogFlops(bs2*(2*a->nz + mbs));
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
252 int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
253 {
254   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
255   IS              isrow=a->row;
256   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
257   int             nz,*vj,k,ierr,*r,idx;
258   MatScalar       *aa=a->a,*v,*d;
259   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
260 
261   PetscFunctionBegin;
262   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
263   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
264   t  = a->solve_work;
265   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
266 
267   /* solve U^T * D * y = b by forward substitution */
268   tp = t;
269   for (k=0; k<mbs; k++) { /* t <- perm(b) */
270     idx   = 7*r[k];
271     tp[0] = b[idx];
272     tp[1] = b[idx+1];
273     tp[2] = b[idx+2];
274     tp[3] = b[idx+3];
275     tp[4] = b[idx+4];
276     tp[5] = b[idx+5];
277     tp[6] = b[idx+6];
278     tp += 7;
279   }
280 
281   for (k=0; k<mbs; k++){
282     v  = aa + 49*ai[k];
283     vj = aj + ai[k];
284     tp = t + k*7;
285     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
286     nz = ai[k+1] - ai[k];
287     tp = t + (*vj)*7;
288     while (nz--) {
289       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
290       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
291       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
292       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
293       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
294       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
295       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
296       vj++; tp = t + (*vj)*7;
297       v += 49;
298     }
299 
300     /* xk = inv(Dk)*(Dk*xk) */
301     d  = aa+k*49;          /* ptr to inv(Dk) */
302     tp    = t + k*7;
303     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
304     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
305     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
306     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
307     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
308     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
309     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
310   }
311 
312   /* solve U*x = y by back substitution */
313   for (k=mbs-1; k>=0; k--){
314     v  = aa + 49*ai[k];
315     vj = aj + ai[k];
316     tp    = t + k*7;
317     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
318     nz = ai[k+1] - ai[k];
319 
320     tp = t + (*vj)*7;
321     while (nz--) {
322       /* xk += U(k,:)*x(:) */
323       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
324       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
325       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
326       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
327       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
328       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
329       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
330       vj++; tp = t + (*vj)*7;
331       v += 49;
332     }
333     tp    = t + k*7;
334     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
335     idx   = 7*r[k];
336     x[idx]     = x0;
337     x[idx+1]   = x1;
338     x[idx+2]   = x2;
339     x[idx+3]   = x3;
340     x[idx+4]   = x4;
341     x[idx+5]   = x5;
342     x[idx+6]   = x6;
343   }
344 
345   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
346   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
347   PetscLogFlops(49*(2*a->nz + mbs));
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
353 int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
354 {
355   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
356   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
357   MatScalar       *aa=a->a,*v,*d;
358   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
359   int             nz,*vj,k,ierr;
360 
361   PetscFunctionBegin;
362 
363   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
364   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
365 
366   /* solve U^T * D * y = b by forward substitution */
367   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
368   for (k=0; k<mbs; k++){
369     v  = aa + 49*ai[k];
370     xp = x + k*7;
371     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
372     nz = ai[k+1] - ai[k];
373     vj = aj + ai[k];
374     xp = x + (*vj)*7;
375     while (nz--) {
376       /* x(:) += U(k,:)^T*(Dk*xk) */
377       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
378       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
379       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
380       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
381       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
382       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
383       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
384       vj++; xp = x + (*vj)*7;
385       v += 49;
386     }
387     /* xk = inv(Dk)*(Dk*xk) */
388     d  = aa+k*49;          /* ptr to inv(Dk) */
389     xp = x + k*7;
390     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
391     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
392     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
393     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
394     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
395     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
396     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
397   }
398 
399   /* solve U*x = y by back substitution */
400   for (k=mbs-1; k>=0; k--){
401     v  = aa + 49*ai[k];
402     xp = x + k*7;
403     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
404     nz = ai[k+1] - ai[k];
405     vj = aj + ai[k];
406     xp = x + (*vj)*7;
407     while (nz--) {
408       /* xk += U(k,:)*x(:) */
409       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
410       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
411       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
412       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
413       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
414       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
415       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
416       vj++;
417       v += 49; xp = x + (*vj)*7;
418     }
419     xp = x + k*7;
420     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
421   }
422 
423   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   PetscLogFlops(49*(2*a->nz + mbs));
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
431 int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
432 {
433   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
434   IS              isrow=a->row;
435   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
436   int             nz,*vj,k,ierr,*r,idx;
437   MatScalar       *aa=a->a,*v,*d;
438   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
439 
440   PetscFunctionBegin;
441   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
442   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
443   t  = a->solve_work;
444   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
445 
446   /* solve U^T * D * y = b by forward substitution */
447   tp = t;
448   for (k=0; k<mbs; k++) { /* t <- perm(b) */
449     idx   = 6*r[k];
450     tp[0] = b[idx];
451     tp[1] = b[idx+1];
452     tp[2] = b[idx+2];
453     tp[3] = b[idx+3];
454     tp[4] = b[idx+4];
455     tp[5] = b[idx+5];
456     tp += 6;
457   }
458 
459   for (k=0; k<mbs; k++){
460     v  = aa + 36*ai[k];
461     vj = aj + ai[k];
462     tp = t + k*6;
463     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
464     nz = ai[k+1] - ai[k];
465     tp = t + (*vj)*6;
466     while (nz--) {
467       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
468       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
469       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
470       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
471       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
472       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
473       vj++; tp = t + (*vj)*6;
474       v += 36;
475     }
476 
477     /* xk = inv(Dk)*(Dk*xk) */
478     d  = aa+k*36;          /* ptr to inv(Dk) */
479     tp    = t + k*6;
480     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
481     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
482     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
483     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
484     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
485     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
486   }
487 
488   /* solve U*x = y by back substitution */
489   for (k=mbs-1; k>=0; k--){
490     v  = aa + 36*ai[k];
491     vj = aj + ai[k];
492     tp    = t + k*6;
493     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
494     nz = ai[k+1] - ai[k];
495 
496     tp = t + (*vj)*6;
497     while (nz--) {
498       /* xk += U(k,:)*x(:) */
499       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
500       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
501       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
502       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
503       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
504       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
505       vj++; tp = t + (*vj)*6;
506       v += 36;
507     }
508     tp    = t + k*6;
509     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
510     idx   = 6*r[k];
511     x[idx]     = x0;
512     x[idx+1]   = x1;
513     x[idx+2]   = x2;
514     x[idx+3]   = x3;
515     x[idx+4]   = x4;
516     x[idx+5]   = x5;
517   }
518 
519   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
520   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
521   PetscLogFlops(36*(2*a->nz + mbs));
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
527 int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
528 {
529   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
530   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
531   MatScalar       *aa=a->a,*v,*d;
532   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5;
533   int             nz,*vj,k,ierr;
534 
535   PetscFunctionBegin;
536 
537   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
538   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
539 
540   /* solve U^T * D * y = b by forward substitution */
541   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
542   for (k=0; k<mbs; k++){
543     v  = aa + 36*ai[k];
544     xp = x + k*6;
545     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
546     nz = ai[k+1] - ai[k];
547     vj = aj + ai[k];
548     xp = x + (*vj)*6;
549     while (nz--) {
550       /* x(:) += U(k,:)^T*(Dk*xk) */
551       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
552       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
553       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
554       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
555       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
556       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
557       vj++; xp = x + (*vj)*6;
558       v += 36;
559     }
560     /* xk = inv(Dk)*(Dk*xk) */
561     d  = aa+k*36;          /* ptr to inv(Dk) */
562     xp = x + k*6;
563     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
564     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
565     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
566     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
567     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
568     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
569   }
570 
571   /* solve U*x = y by back substitution */
572   for (k=mbs-1; k>=0; k--){
573     v  = aa + 36*ai[k];
574     xp = x + k*6;
575     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
576     nz = ai[k+1] - ai[k];
577     vj = aj + ai[k];
578     xp = x + (*vj)*6;
579     while (nz--) {
580       /* xk += U(k,:)*x(:) */
581       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
582       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
583       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
584       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
585       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
586       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
587       vj++;
588       v += 36; xp = x + (*vj)*6;
589     }
590     xp = x + k*6;
591     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
592   }
593 
594   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
595   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
596   PetscLogFlops(36*(2*a->nz + mbs));
597   PetscFunctionReturn(0);
598 }
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
602 int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
603 {
604   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
605   IS              isrow=a->row;
606   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
607   int             nz,*vj,k,ierr,*r,idx;
608   MatScalar       *aa=a->a,*v,*diag;
609   PetscScalar     *x,*b,x0,x1,x2,x3,x4,*t,*tp;
610 
611   PetscFunctionBegin;
612   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
613   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
614   t  = a->solve_work;
615   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
616 
617   /* solve U^T * D * y = b by forward substitution */
618   tp = t;
619   for (k=0; k<mbs; k++) { /* t <- perm(b) */
620     idx   = 5*r[k];
621     tp[0] = b[idx];
622     tp[1] = b[idx+1];
623     tp[2] = b[idx+2];
624     tp[3] = b[idx+3];
625     tp[4] = b[idx+4];
626     tp += 5;
627   }
628 
629   for (k=0; k<mbs; k++){
630     v  = aa + 25*ai[k];
631     vj = aj + ai[k];
632     tp = t + k*5;
633     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
634     nz = ai[k+1] - ai[k];
635 
636     tp = t + (*vj)*5;
637     while (nz--) {
638       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
639       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
640       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
641       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
642       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
643       vj++; tp = t + (*vj)*5;
644       v += 25;
645     }
646 
647     /* xk = inv(Dk)*(Dk*xk) */
648     diag  = aa+k*25;          /* ptr to inv(Dk) */
649     tp    = t + k*5;
650       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
651       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
652       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
653       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
654       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
655   }
656 
657   /* solve U*x = y by back substitution */
658   for (k=mbs-1; k>=0; k--){
659     v  = aa + 25*ai[k];
660     vj = aj + ai[k];
661     tp    = t + k*5;
662     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
663     nz = ai[k+1] - ai[k];
664 
665     tp = t + (*vj)*5;
666     while (nz--) {
667       /* xk += U(k,:)*x(:) */
668       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
669       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
670       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
671       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
672       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
673       vj++; tp = t + (*vj)*5;
674       v += 25;
675     }
676     tp    = t + k*5;
677     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
678     idx   = 5*r[k];
679     x[idx]     = x0;
680     x[idx+1]   = x1;
681     x[idx+2]   = x2;
682     x[idx+3]   = x3;
683     x[idx+4]   = x4;
684   }
685 
686   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
687   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
688   PetscLogFlops(25*(2*a->nz + mbs));
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
694 int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
695 {
696   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
697   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
698   MatScalar       *aa=a->a,*v,*diag;
699   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4;
700   int             nz,*vj,k,ierr;
701 
702   PetscFunctionBegin;
703 
704   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
705   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
706 
707   /* solve U^T * D * y = b by forward substitution */
708   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
709   for (k=0; k<mbs; k++){
710     v  = aa + 25*ai[k];
711     xp = x + k*5;
712     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
713     nz = ai[k+1] - ai[k];
714     vj = aj + ai[k];
715     xp = x + (*vj)*5;
716     while (nz--) {
717       /* x(:) += U(k,:)^T*(Dk*xk) */
718       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
719       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
720       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
721       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
722       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
723       vj++; xp = x + (*vj)*5;
724       v += 25;
725     }
726     /* xk = inv(Dk)*(Dk*xk) */
727     diag = aa+k*25;          /* ptr to inv(Dk) */
728     xp   = x + k*5;
729     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
730     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
731     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
732     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
733     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
734   }
735 
736   /* solve U*x = y by back substitution */
737   for (k=mbs-1; k>=0; k--){
738     v  = aa + 25*ai[k];
739     xp = x + k*5;
740     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
741     nz = ai[k+1] - ai[k];
742     vj = aj + ai[k];
743     xp = x + (*vj)*5;
744     while (nz--) {
745       /* xk += U(k,:)*x(:) */
746       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
747       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
748       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
749       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
750       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
751       vj++;
752       v += 25; xp = x + (*vj)*5;
753     }
754     xp = x + k*5;
755     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
756   }
757 
758   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
759   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
760   PetscLogFlops(25*(2*a->nz + mbs));
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
766 int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
767 {
768   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
769   IS              isrow=a->row;
770   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
771   int             nz,*vj,k,ierr,*r,idx;
772   MatScalar       *aa=a->a,*v,*diag;
773   PetscScalar     *x,*b,x0,x1,x2,x3,*t,*tp;
774 
775   PetscFunctionBegin;
776   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
777   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
778   t  = a->solve_work;
779   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
780 
781   /* solve U^T * D * y = b by forward substitution */
782   tp = t;
783   for (k=0; k<mbs; k++) { /* t <- perm(b) */
784     idx   = 4*r[k];
785     tp[0] = b[idx];
786     tp[1] = b[idx+1];
787     tp[2] = b[idx+2];
788     tp[3] = b[idx+3];
789     tp += 4;
790   }
791 
792   for (k=0; k<mbs; k++){
793     v  = aa + 16*ai[k];
794     vj = aj + ai[k];
795     tp = t + k*4;
796     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
797     nz = ai[k+1] - ai[k];
798 
799     tp = t + (*vj)*4;
800     while (nz--) {
801       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
802       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
803       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
804       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
805       vj++; tp = t + (*vj)*4;
806       v += 16;
807     }
808 
809     /* xk = inv(Dk)*(Dk*xk) */
810     diag  = aa+k*16;          /* ptr to inv(Dk) */
811     tp    = t + k*4;
812     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
813     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
814     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
815     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
816   }
817 
818   /* solve U*x = y by back substitution */
819   for (k=mbs-1; k>=0; k--){
820     v  = aa + 16*ai[k];
821     vj = aj + ai[k];
822     tp    = t + k*4;
823     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
824     nz = ai[k+1] - ai[k];
825 
826     tp = t + (*vj)*4;
827     while (nz--) {
828       /* xk += U(k,:)*x(:) */
829       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
830       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
831       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
832       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
833       vj++; tp = t + (*vj)*4;
834       v += 16;
835     }
836     tp    = t + k*4;
837     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
838     idx        = 4*r[k];
839     x[idx]     = x0;
840     x[idx+1]   = x1;
841     x[idx+2]   = x2;
842     x[idx+3]   = x3;
843   }
844 
845   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
846   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
847   PetscLogFlops(16*(2*a->nz + mbs));
848   PetscFunctionReturn(0);
849 }
850 
851 /*
852    Special case where the matrix was factored in the natural ordering.
853    This eliminates the need for the column and row permutation.
854 */
855 #undef __FUNCT__
856 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
857 int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
858 {
859   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
860   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
861   MatScalar       *aa=a->a,*v,*diag;
862   PetscScalar     *x,*xp,*b,x0,x1,x2,x3;
863   int             nz,*vj,k,ierr;
864 
865   PetscFunctionBegin;
866 
867   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
868   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
869 
870   /* solve U^T * D * y = b by forward substitution */
871   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
872   for (k=0; k<mbs; k++){
873     v  = aa + 16*ai[k];
874     xp = x + k*4;
875     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
876     nz = ai[k+1] - ai[k];
877     vj = aj + ai[k];
878     xp = x + (*vj)*4;
879     while (nz--) {
880       /* x(:) += U(k,:)^T*(Dk*xk) */
881       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
882       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
883       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
884       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
885       vj++; xp = x + (*vj)*4;
886       v += 16;
887     }
888     /* xk = inv(Dk)*(Dk*xk) */
889     diag = aa+k*16;          /* ptr to inv(Dk) */
890     xp   = x + k*4;
891     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
892     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
893     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
894     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
895   }
896 
897   /* solve U*x = y by back substitution */
898   for (k=mbs-1; k>=0; k--){
899     v  = aa + 16*ai[k];
900     xp = x + k*4;
901     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
902     nz = ai[k+1] - ai[k];
903     vj = aj + ai[k];
904     xp = x + (*vj)*4;
905     while (nz--) {
906       /* xk += U(k,:)*x(:) */
907       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
908       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
909       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
910       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
911       vj++;
912       v += 16; xp = x + (*vj)*4;
913     }
914     xp = x + k*4;
915     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
916   }
917 
918   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
919   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
920   PetscLogFlops(16*(2*a->nz + mbs));
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
926 int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
927 {
928   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
929   IS              isrow=a->row;
930   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
931   int             nz,*vj,k,ierr,*r,idx;
932   MatScalar       *aa=a->a,*v,*diag;
933   PetscScalar     *x,*b,x0,x1,x2,*t,*tp;
934 
935   PetscFunctionBegin;
936   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
937   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
938   t  = a->solve_work;
939   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
940 
941   /* solve U^T * D * y = b by forward substitution */
942   tp = t;
943   for (k=0; k<mbs; k++) { /* t <- perm(b) */
944     idx   = 3*r[k];
945     tp[0] = b[idx];
946     tp[1] = b[idx+1];
947     tp[2] = b[idx+2];
948     tp += 3;
949   }
950 
951   for (k=0; k<mbs; k++){
952     v  = aa + 9*ai[k];
953     vj = aj + ai[k];
954     tp = t + k*3;
955     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
956     nz = ai[k+1] - ai[k];
957 
958     tp = t + (*vj)*3;
959     while (nz--) {
960       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
961       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
962       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
963       vj++; tp = t + (*vj)*3;
964       v += 9;
965     }
966 
967     /* xk = inv(Dk)*(Dk*xk) */
968     diag  = aa+k*9;          /* ptr to inv(Dk) */
969     tp    = t + k*3;
970     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
971     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
972     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
973   }
974 
975   /* solve U*x = y by back substitution */
976   for (k=mbs-1; k>=0; k--){
977     v  = aa + 9*ai[k];
978     vj = aj + ai[k];
979     tp    = t + k*3;
980     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
981     nz = ai[k+1] - ai[k];
982 
983     tp = t + (*vj)*3;
984     while (nz--) {
985       /* xk += U(k,:)*x(:) */
986       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
987       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
988       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
989       vj++; tp = t + (*vj)*3;
990       v += 9;
991     }
992     tp    = t + k*3;
993     tp[0] = x0; tp[1] = x1; tp[2] = x2;
994     idx      = 3*r[k];
995     x[idx]   = x0;
996     x[idx+1] = x1;
997     x[idx+2] = x2;
998   }
999 
1000   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1001   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1002   PetscLogFlops(9*(2*a->nz + mbs));
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 /*
1007    Special case where the matrix was factored in the natural ordering.
1008    This eliminates the need for the column and row permutation.
1009 */
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1012 int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1013 {
1014   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1015   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1016   MatScalar       *aa=a->a,*v,*diag;
1017   PetscScalar     *x,*xp,*b,x0,x1,x2;
1018   int             nz,*vj,k,ierr;
1019 
1020   PetscFunctionBegin;
1021 
1022   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1023   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1024 
1025   /* solve U^T * D * y = b by forward substitution */
1026   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1027   for (k=0; k<mbs; k++){
1028     v  = aa + 9*ai[k];
1029     xp = x + k*3;
1030     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1031     nz = ai[k+1] - ai[k];
1032     vj = aj + ai[k];
1033     xp = x + (*vj)*3;
1034     while (nz--) {
1035       /* x(:) += U(k,:)^T*(Dk*xk) */
1036       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1037       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1038       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1039       vj++; xp = x + (*vj)*3;
1040       v += 9;
1041     }
1042     /* xk = inv(Dk)*(Dk*xk) */
1043     diag = aa+k*9;          /* ptr to inv(Dk) */
1044     xp   = x + k*3;
1045     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1046     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1047     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1048   }
1049 
1050   /* solve U*x = y by back substitution */
1051   for (k=mbs-1; k>=0; k--){
1052     v  = aa + 9*ai[k];
1053     xp = x + k*3;
1054     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1055     nz = ai[k+1] - ai[k];
1056     vj = aj + ai[k];
1057     xp = x + (*vj)*3;
1058     while (nz--) {
1059       /* xk += U(k,:)*x(:) */
1060       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1061       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1062       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1063       vj++;
1064       v += 9; xp = x + (*vj)*3;
1065     }
1066     xp = x + k*3;
1067     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1068   }
1069 
1070   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1071   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1072   PetscLogFlops(9*(2*a->nz + mbs));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1078 int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1079 {
1080   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ *)A->data;
1081   IS              isrow=a->row;
1082   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1083   int             nz,*vj,k,k2,ierr,*r,idx;
1084   MatScalar       *aa=a->a,*v,*diag;
1085   PetscScalar     *x,*b,x0,x1,*t;
1086 
1087   PetscFunctionBegin;
1088   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1089   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1090   t  = a->solve_work;
1091   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
1092   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1093 
1094   /* solve U^T * D * y = perm(b) by forward substitution */
1095   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1096     idx = 2*r[k];
1097     t[k*2]   = b[idx];
1098     t[k*2+1] = b[idx+1];
1099   }
1100   for (k=0; k<mbs; k++){
1101     v  = aa + 4*ai[k];
1102     vj = aj + ai[k];
1103     k2 = k*2;
1104     x0 = t[k2]; x1 = t[k2+1];
1105     nz = ai[k+1] - ai[k];
1106     while (nz--) {
1107       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1108       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1109       vj++; v += 4;
1110     }
1111     diag = aa+k*4;  /* ptr to inv(Dk) */
1112     t[k2]   = diag[0]*x0 + diag[2]*x1;
1113     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1114   }
1115 
1116   /* solve U*x = y by back substitution */
1117   for (k=mbs-1; k>=0; k--){
1118     v  = aa + 4*ai[k];
1119     vj = aj + ai[k];
1120     k2 = k*2;
1121     x0 = t[k2]; x1 = t[k2+1];
1122     nz = ai[k+1] - ai[k];
1123     while (nz--) {
1124       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1125       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1126       vj++; v += 4;
1127     }
1128     t[k2]    = x0;
1129     t[k2+1]  = x1;
1130     idx      = 2*r[k];
1131     x[idx]   = x0;
1132     x[idx+1] = x1;
1133   }
1134 
1135   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1136   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1137   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1138   PetscLogFlops(4*(2*a->nz + mbs));
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*
1143    Special case where the matrix was factored in the natural ordering.
1144    This eliminates the need for the column and row permutation.
1145 */
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1148 int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1149 {
1150   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1151   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1152   MatScalar       *aa=a->a,*v,*diag;
1153   PetscScalar     *x,*b,x0,x1;
1154   int             nz,*vj,k,k2,ierr;
1155 
1156   PetscFunctionBegin;
1157 
1158   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1159   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1160 
1161   /* solve U^T * D * y = b by forward substitution */
1162   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1163   for (k=0; k<mbs; k++){
1164     v  = aa + 4*ai[k];
1165     vj = aj + ai[k];
1166     k2 = k*2;
1167     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1168     nz = ai[k+1] - ai[k];
1169 
1170     while (nz--) {
1171       /* x(:) += U(k,:)^T*(Dk*xk) */
1172       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1173       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1174       vj++; v += 4;
1175     }
1176     /* xk = inv(Dk)*(Dk*xk) */
1177     diag = aa+k*4;          /* ptr to inv(Dk) */
1178     x[k2]   = diag[0]*x0 + diag[2]*x1;
1179     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1180   }
1181 
1182   /* solve U*x = y by back substitution */
1183   for (k=mbs-1; k>=0; k--){
1184     v  = aa + 4*ai[k];
1185     vj = aj + ai[k];
1186     k2 = k*2;
1187     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1188     nz = ai[k+1] - ai[k];
1189     while (nz--) {
1190       /* xk += U(k,:)*x(:) */
1191       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1192       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1193       vj++; v += 4;
1194     }
1195     x[k2]     = x0;
1196     x[k2+1]   = x1;
1197   }
1198 
1199   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1201   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1207 int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1208 {
1209   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ *)A->data;
1210   IS              isrow=a->row;
1211   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip;
1212   MatScalar       *aa=a->a,*v;
1213   PetscScalar     *x,*b,xk,*t;
1214   int             nz,*vj,k;
1215 
1216   PetscFunctionBegin;
1217   if (!mbs) PetscFunctionReturn(0);
1218 
1219   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1220   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1221   t    = a->solve_work;
1222 
1223   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1224 
1225   /* solve U^T*D*y = perm(b) by forward substitution */
1226   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1227   for (k=0; k<mbs; k++){
1228     v  = aa + ai[k];
1229     vj = aj + ai[k];
1230     xk = t[k];
1231     nz = ai[k+1] - ai[k];
1232     while (nz--) t[*vj++] += (*v++) * xk;
1233     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1234   }
1235 
1236   /* solve U*x = y by back substitution */
1237   for (k=mbs-1; k>=0; k--){
1238     v  = aa + ai[k];
1239     vj = aj + ai[k];
1240     xk = t[k];
1241     nz = ai[k+1] - ai[k];
1242     while (nz--) xk += (*v++) * t[*vj++];
1243     t[k]      = xk;
1244     x[rip[k]] = xk;
1245   }
1246 
1247   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1248   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1249   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1250   PetscLogFlops(4*a->nz + A->m);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1256 int MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1257 {
1258   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1259   int          ierr;
1260 
1261   PetscFunctionBegin;
1262   if (a->bs == 1) {
1263     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1264   } else {
1265     IS              isrow=a->row;
1266     int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1267     MatScalar       *aa=a->a,*v;
1268     PetscScalar     *x,*b,*t;
1269     int             nz,*vj,k,n;
1270     if (bb->n > a->solves_work_n) {
1271       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1272       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1273       a->solves_work_n = bb->n;
1274     }
1275     n    = bb->n;
1276     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1277     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1278     t    = a->solves_work;
1279 
1280     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1281 
1282     /* solve U^T*D*y = perm(b) by forward substitution */
1283     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1284     for (k=0; k<mbs; k++){
1285       v  = aa + ai[k];
1286       vj = aj + ai[k];
1287       nz = ai[k+1] - ai[k];
1288       while (nz--) {
1289         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1290         v++;vj++;
1291       }
1292       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1293     }
1294 
1295     /* solve U*x = y by back substitution */
1296     for (k=mbs-1; k>=0; k--){
1297       v  = aa + ai[k];
1298       vj = aj + ai[k];
1299       nz = ai[k+1] - ai[k];
1300       while (nz--) {
1301         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1302         v++;vj++;
1303       }
1304       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1305     }
1306 
1307     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1308     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1309     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1310     PetscLogFlops(bb->n*(4*a->nz + A->m));
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 /*
1316       Special case where the matrix was ILU(0) factored in the natural
1317    ordering. This eliminates the need for the column and row permutation.
1318 */
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1321 int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1322 {
1323   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1324   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr;
1325   MatScalar       *aa=a->a,*v;
1326   PetscScalar     *x,*b,xk;
1327   int             nz,*vj,k;
1328 
1329   PetscFunctionBegin;
1330   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1331   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1332 
1333   /* solve U^T*D*y = b by forward substitution */
1334   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1335   for (k=0; k<mbs; k++){
1336     v  = aa + ai[k] + 1;
1337     vj = aj + ai[k] + 1;
1338     xk = x[k];
1339     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1340     while (nz--) x[*vj++] += (*v++) * xk;
1341     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1342   }
1343 
1344   /* solve U*x = y by back substitution */
1345   for (k=mbs-2; k>=0; k--){
1346     v  = aa + ai[k] + 1;
1347     vj = aj + ai[k] + 1;
1348     xk = x[k];
1349     nz = ai[k+1] - ai[k] - 1;
1350     while (nz--) xk += (*v++) * x[*vj++];
1351     x[k] = xk;
1352   }
1353 
1354   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1355   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1356   PetscLogFlops(4*a->nz + A->m);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1363 int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1364 {
1365   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1366   int         *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1367   int         *jutmp,bs = a->bs,bs2=a->bs2;
1368   int         m,realloc = 0,*levtmp;
1369   int         *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1370   int         incrlev,*lev,shift,prow,nz;
1371   int         *il,ili,nextprow;
1372   PetscReal   f = info->fill,levels = info->levels;
1373   PetscTruth  perm_identity;
1374 
1375   PetscFunctionBegin;
1376   /* check whether perm is the identity mapping */
1377   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1378 
1379   /* special case that simply copies fill pattern */
1380   if (!levels && perm_identity && bs==1) {
1381     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1382     (*B)->factor    = FACTOR_CHOLESKY;
1383     b               = (Mat_SeqSBAIJ*)(*B)->data;
1384     b->row          = perm;
1385     b->icol         = perm;
1386     b->factor_damping   = info->damping;
1387     b->factor_shift     = info->shift;
1388     b->factor_zeropivot = info->zeropivot;
1389     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1390     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1391     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1392     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1393     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1394     PetscFunctionReturn(0);
1395   }
1396 
1397   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1398   if (levels > 0 && perm_identity && bs==1 ){
1399     if (!perm_identity) a->permute = PETSC_TRUE;
1400 
1401   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1402 
1403   if (perm_identity){ /* without permutation */
1404     ai = a->i; aj = a->j;
1405   } else {            /* non-trivial permutation */
1406     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1407     ai = a->inew; aj = a->jnew;
1408   }
1409 
1410   /* initialization */
1411   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1412   umax  = (int)(f*ai[mbs] + 1);
1413   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1414   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1415   iu[0] = 0;
1416   juidx = 0; /* index for ju */
1417   ierr  = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1418   q      = jl + mbs;   /* linked list for col index of active row */
1419   levtmp = q + mbs;
1420   il     = levtmp + mbs;
1421   for (i=0; i<mbs; i++){
1422     jl[i] = mbs;
1423     q[i]  = 0;
1424     il[i] = 0;
1425   }
1426 
1427   /* for each row k */
1428   for (k=0; k<mbs; k++){
1429     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1430     q[k] = mbs;
1431     /* initialize nonzero structure of k-th row to row rip[k] of A */
1432     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1433     jmax = ai[rip[k]+1];
1434     for (j=jmin; j<jmax; j++){
1435       vj = rip[aj[j]]; /* col. value */
1436       if(vj > k){
1437         qm = k;
1438         do {
1439           m  = qm; qm = q[m];
1440         } while(qm < vj);
1441         if (qm == vj) {
1442           SETERRQ(1," error: duplicate entry in A\n");
1443         }
1444         nzk++;
1445         q[m]  = vj;
1446         q[vj] = qm;
1447         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1448       } /* if(vj > k) */
1449     } /* for (j=jmin; j<jmax; j++) */
1450 
1451     /* modify nonzero structure of k-th row by computing fill-in
1452        for each row i to be merged in */
1453     prow = k;
1454     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1455 
1456     while (prow < k){
1457       nextprow = jl[prow];
1458       /* merge row prow into k-th row */
1459       ili  = il[prow];
1460       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1461       jmax = iu[prow+1];
1462       qm   = k;
1463       for (j=jmin; j<jmax; j++){
1464         vj = ju[j];
1465         incrlev = lev[j] + 1;
1466         if (incrlev > levels) continue;
1467         do {
1468           m = qm; qm = q[m];
1469         } while (qm < vj);
1470         if (qm != vj){  /* a fill */
1471           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1472           levtmp[vj]  = incrlev;
1473         } else {
1474           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1475         }
1476       }
1477       if (jmin < jmax){ /* update il and jl */
1478         il[prow] = jmin;
1479         j = ju[jmin];
1480         jl[prow] = jl[j]; jl[j] = prow;
1481       }
1482       prow = nextprow;
1483     }
1484 
1485     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1486     if (nzk > 0){
1487       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1488       jl[k] = jl[i]; jl[i] = k;
1489       il[k] = iu[k] + 1;
1490     }
1491     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1492 
1493     /* allocate more space to ju if needed */
1494     if (iu[k+1] > umax) {
1495       /* estimate how much additional space we will need */
1496       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1497       /* just double the memory each time */
1498       maxadd = umax;
1499       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1500       umax += maxadd;
1501 
1502       /* allocate a longer ju */
1503       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1504       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1505       ierr = PetscFree(ju);CHKERRQ(ierr);
1506       ju   = jutmp;
1507 
1508       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1509       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr);
1510       ierr     = PetscFree(lev);CHKERRQ(ierr);
1511       lev      = jutmp;
1512       realloc += 2; /* count how many times we realloc */
1513     }
1514 
1515     /* save nonzero structure of k-th row in ju */
1516     ju[juidx]  = k; /* diag[k] */
1517     lev[juidx] = 0;
1518     juidx++;
1519     i = k;
1520     while (nzk --) {
1521       i           = q[i];
1522       ju[juidx] = i;
1523       lev[juidx] = levtmp[i];
1524       juidx++;
1525     }
1526   } /* end of for (k=0; k<mbs; k++) */
1527 
1528   if (ai[mbs] != 0) {
1529     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1530     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1531     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1532     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1533     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1534   } else {
1535      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1536   }
1537 
1538   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1539   ierr = PetscFree(jl);CHKERRQ(ierr);
1540   ierr = PetscFree(lev);CHKERRQ(ierr);
1541 
1542   /* put together the new matrix */
1543   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
1544   /* PetscLogObjectParent(*B,iperm); */
1545   b = (Mat_SeqSBAIJ*)(*B)->data;
1546   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1547   b->singlemalloc = PETSC_FALSE;
1548   /* the next line frees the default space generated by the Create() */
1549   ierr = PetscFree(b->a);CHKERRQ(ierr);
1550   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1551   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1552   b->j    = ju;
1553   b->i    = iu;
1554   b->diag = 0;
1555   b->ilen = 0;
1556   b->imax = 0;
1557   b->row  = perm;
1558   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1559   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1560   b->icol = perm;
1561   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1562   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1563   /* In b structure:  Free imax, ilen, old a, old j.
1564      Allocate idnew, solve_work, new a, new j */
1565   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1566   b->maxnz          = b->nz = iu[mbs];
1567   b->factor_damping   = info->damping;
1568   b->factor_shift     = info->shift;
1569   b->factor_zeropivot = info->zeropivot;
1570 
1571   (*B)->factor                 = FACTOR_CHOLESKY;
1572   (*B)->info.factor_mallocs    = realloc;
1573   (*B)->info.fill_ratio_given  = f;
1574   if (ai[mbs] != 0) {
1575     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1576   } else {
1577     (*B)->info.fill_ratio_needed = 0.0;
1578   }
1579 
1580 
1581   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1582   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1583   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1584 
1585   PetscFunctionReturn(0);
1586   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1587 
1588   if (!perm_identity) a->permute = PETSC_TRUE;
1589   if (perm_identity){
1590     ai = a->i; aj = a->j;
1591   } else { /*  non-trivial permutation */
1592     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1593     ai = a->inew; aj = a->jnew;
1594   }
1595 
1596   /* initialization */
1597   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1598   umax  = (int)(f*ai[mbs] + 1);
1599   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1600   umax += mbs + 1;
1601   shift = mbs + 1;
1602   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1603   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1604   iu[0] = mbs + 1;
1605   juidx = mbs + 1;
1606   /* prowl: linked list for pivot row */
1607   ierr    = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr);
1608   /* q: linked list for col index */
1609   q       = prowl + mbs;
1610   levtmp  = q     + mbs;
1611 
1612   for (i=0; i<mbs; i++){
1613     prowl[i] = mbs;
1614     q[i] = 0;
1615   }
1616 
1617   /* for each row k */
1618   for (k=0; k<mbs; k++){
1619     nzk  = 0;
1620     q[k] = mbs;
1621     /* copy current row into linked list */
1622     nz = ai[rip[k]+1] - ai[rip[k]];
1623     j = ai[rip[k]];
1624     while (nz--){
1625       vj = rip[aj[j++]];
1626       if (vj > k){
1627         qm = k;
1628         do {
1629           m  = qm; qm = q[m];
1630         } while(qm < vj);
1631         if (qm == vj) {
1632           SETERRQ(1," error: duplicate entry in A\n");
1633         }
1634         nzk++;
1635         q[m]       = vj;
1636         q[vj]      = qm;
1637         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1638       }
1639     }
1640 
1641     /* modify nonzero structure of k-th row by computing fill-in
1642        for each row prow to be merged in */
1643     prow = k;
1644     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1645 
1646     while (prow < k){
1647       /* merge row prow into k-th row */
1648       jmin = iu[prow] + 1;
1649       jmax = iu[prow+1];
1650       qm = k;
1651       for (j=jmin; j<jmax; j++){
1652         incrlev = lev[j-shift] + 1;
1653 	if (incrlev > levels) continue;
1654 
1655         vj      = ju[j];
1656         do {
1657           m = qm; qm = q[m];
1658         } while (qm < vj);
1659         if (qm != vj){      /* a fill */
1660           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1661           levtmp[vj] = incrlev;
1662         } else {
1663           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1664         }
1665       }
1666       prow = prowl[prow]; /* next pivot row */
1667     }
1668 
1669     /* add k to row list for first nonzero element in k-th row */
1670     if (nzk > 1){
1671       i = q[k]; /* col value of first nonzero element in k_th row of U */
1672       prowl[k] = prowl[i]; prowl[i] = k;
1673     }
1674     iu[k+1] = iu[k] + nzk;
1675 
1676     /* allocate more space to ju and lev if needed */
1677     if (iu[k+1] > umax) {
1678       /* estimate how much additional space we will need */
1679       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1680       /* just double the memory each time */
1681       maxadd = umax;
1682       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1683       umax += maxadd;
1684 
1685       /* allocate a longer ju */
1686       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1687       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1688       ierr     = PetscFree(ju);CHKERRQ(ierr);
1689       ju       = jutmp;
1690 
1691       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1692       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr);
1693       ierr     = PetscFree(lev);CHKERRQ(ierr);
1694       lev      = jutmp;
1695       realloc += 2; /* count how many times we realloc */
1696     }
1697 
1698     /* save nonzero structure of k-th row in ju */
1699     i=k;
1700     while (nzk --) {
1701       i                = q[i];
1702       ju[juidx]        = i;
1703       lev[juidx-shift] = levtmp[i];
1704       juidx++;
1705     }
1706   }
1707 
1708   if (ai[mbs] != 0) {
1709     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1710     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1711     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1712     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1713     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1714   } else {
1715     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1716   }
1717 
1718   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1719   ierr = PetscFree(prowl);CHKERRQ(ierr);
1720   ierr = PetscFree(lev);CHKERRQ(ierr);
1721 
1722   /* put together the new matrix */
1723   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
1724   /* PetscLogObjectParent(*B,iperm); */
1725   b    = (Mat_SeqSBAIJ*)(*B)->data;
1726   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1727   b->singlemalloc = PETSC_FALSE;
1728   /* the next line frees the default space generated by the Create() */
1729   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1730   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1731   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1732   b->j    = ju;
1733   b->i    = iu;
1734   b->diag = 0;
1735   b->ilen = 0;
1736   b->imax = 0;
1737 
1738   if (b->row) {
1739     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1740   }
1741   if (b->icol) {
1742     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1743   }
1744   b->row  = perm;
1745   b->icol = perm;
1746   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1747   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1748   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1749   /* In b structure:  Free imax, ilen, old a, old j.
1750      Allocate idnew, solve_work, new a, new j */
1751   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1752   b->maxnz = b->nz = iu[mbs];
1753 
1754   (*B)->factor                 = FACTOR_CHOLESKY;
1755   (*B)->info.factor_mallocs    = realloc;
1756   (*B)->info.fill_ratio_given  = f;
1757   if (ai[mbs] != 0) {
1758     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1759   } else {
1760     (*B)->info.fill_ratio_needed = 0.0;
1761   }
1762 
1763   if (perm_identity){
1764     switch (bs) {
1765       case 1:
1766         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1767         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1768         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1769         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1770         break;
1771       case 2:
1772         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1773         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1774         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1775         break;
1776       case 3:
1777         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1778         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1779         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1780         break;
1781       case 4:
1782         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1783         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1784         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1785         break;
1786       case 5:
1787         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1788         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1789         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1790         break;
1791       case 6:
1792         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1793         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1794         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1795         break;
1796       case 7:
1797         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1798         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1799         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1800       break;
1801       default:
1802         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1803         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1804         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1805       break;
1806     }
1807   }
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 
1813 
1814