xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision d4e9f3b6443ec535fe5e409f785fa3cf019ceb6c)
1 /*$Id: sbaijfact2.c,v 1.2.1.41 2001/08/07 03:03:01 balay Exp $*/
2 /*
3     Factorization code for SBAIJ format.
4 */
5 
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering"
14 int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
15 {
16   PetscFunctionBegin;
17   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering"
22 int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
23 {
24   PetscFunctionBegin;
25   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
26 }
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering"
30 int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
31 {
32   PetscFunctionBegin;
33   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering"
38 int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
39 {
40   PetscFunctionBegin;
41   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering"
46 int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
47 {
48   PetscFunctionBegin;
49   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering"
54 int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
55 {
56   PetscFunctionBegin;
57   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering"
62 int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
63 {
64   PetscFunctionBegin;
65   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1"
70 int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
71 {
72   PetscFunctionBegin;
73   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2"
78 int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
79 {
80   PetscFunctionBegin;
81   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3"
86 int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
87 {
88   PetscFunctionBegin;
89   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
90 }
91 
92 #undef __FUNCT__
93 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4"
94 int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
95 {
96   PetscFunctionBegin;
97   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
98 }
99 
100 #undef __FUNCT__
101 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5"
102 int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
103 {
104   PetscFunctionBegin;
105   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6"
110 int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
111 {
112   PetscFunctionBegin;
113   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7"
118 int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
119 {
120   PetscFunctionBegin;
121   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
126 int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
127 {
128   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
129   IS              isrow=a->row;
130   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
131   int             nz,*vj,k,ierr,*r,idx,k1;
132   int             bs=a->bs,bs2 = a->bs2;
133   MatScalar       *aa=a->a,*v,*diag;
134   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
135 
136   PetscFunctionBegin;
137   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
138   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
139   t  = a->solve_work;
140   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
141   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
142 
143   /* solve U^T * D * y = b by forward substitution */
144   xk = t;
145   for (k=0; k<mbs; k++) { /* t <- perm(b) */
146     idx   = bs*r[k];
147     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
148   }
149   for (k=0; k<mbs; k++){
150     v  = aa + bs2*ai[k];
151     xk = t + k*bs;      /* Dk*xk = k-th block of x */
152     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
153     nz = ai[k+1] - ai[k];
154     vj = aj + ai[k];
155     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
156     while (nz--) {
157       /* x(:) += U(k,:)^T*(Dk*xk) */
158       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
159       vj++; xj = t + (*vj)*bs;
160       v += bs2;
161     }
162     /* xk = inv(Dk)*(Dk*xk) */
163     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
164     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
165   }
166 
167   /* solve U*x = y by back substitution */
168   for (k=mbs-1; k>=0; k--){
169     v  = aa + bs2*ai[k];
170     xk = t + k*bs;        /* xk */
171     nz = ai[k+1] - ai[k];
172     vj = aj + ai[k];
173     xj = t + (*vj)*bs;
174     while (nz--) {
175       /* xk += U(k,:)*x(:) */
176       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
177       vj++;
178       v += bs2; xj = t + (*vj)*bs;
179     }
180     idx   = bs*r[k];
181     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
182   }
183 
184   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
185   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
186   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
187   PetscLogFlops(bs2*(2*a->s_nz + mbs));
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
193 int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
194 {
195   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
196   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
197   int             nz,*vj,k,ierr;
198   int             bs=a->bs,bs2 = a->bs2;
199   MatScalar       *aa=a->a,*v,*diag;
200   PetscScalar     *x,*xk,*xj,*b,*xk_tmp;
201 
202   PetscFunctionBegin;
203 
204   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
205   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
206 
207   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
208 
209   /* solve U^T * D * y = b by forward substitution */
210   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
211   for (k=0; k<mbs; k++){
212     v  = aa + bs2*ai[k];
213     xk = x + k*bs;      /* Dk*xk = k-th block of x */
214     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
215     nz = ai[k+1] - ai[k];
216     vj = aj + ai[k];
217     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
218     while (nz--) {
219       /* x(:) += U(k,:)^T*(Dk*xk) */
220       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
221       vj++; xj = x + (*vj)*bs;
222       v += bs2;
223     }
224     /* xk = inv(Dk)*(Dk*xk) */
225     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
226     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
227   }
228 
229   /* solve U*x = y by back substitution */
230   for (k=mbs-1; k>=0; k--){
231     v  = aa + bs2*ai[k];
232     xk = x + k*bs;        /* xk */
233     nz = ai[k+1] - ai[k];
234     vj = aj + ai[k];
235     xj = x + (*vj)*bs;
236     while (nz--) {
237       /* xk += U(k,:)*x(:) */
238       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
239       vj++;
240       v += bs2; xj = x + (*vj)*bs;
241     }
242   }
243 
244   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
245   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
246   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
247   PetscLogFlops(bs2*(2*a->s_nz + mbs));
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
253 int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
254 {
255   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
256   IS              isrow=a->row;
257   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
258   int             nz,*vj,k,ierr,*r,idx;
259   MatScalar       *aa=a->a,*v,*d;
260   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
261 
262   PetscFunctionBegin;
263   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   t  = a->solve_work;
266   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
267 
268   /* solve U^T * D * y = b by forward substitution */
269   tp = t;
270   for (k=0; k<mbs; k++) { /* t <- perm(b) */
271     idx   = 7*r[k];
272     tp[0] = b[idx];
273     tp[1] = b[idx+1];
274     tp[2] = b[idx+2];
275     tp[3] = b[idx+3];
276     tp[4] = b[idx+4];
277     tp[5] = b[idx+5];
278     tp[6] = b[idx+6];
279     tp += 7;
280   }
281 
282   for (k=0; k<mbs; k++){
283     v  = aa + 49*ai[k];
284     vj = aj + ai[k];
285     tp = t + k*7;
286     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
287     nz = ai[k+1] - ai[k];
288     tp = t + (*vj)*7;
289     while (nz--) {
290       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
291       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
292       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
293       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
294       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
295       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
296       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
297       vj++; tp = t + (*vj)*7;
298       v += 49;
299     }
300 
301     /* xk = inv(Dk)*(Dk*xk) */
302     d  = aa+k*49;          /* ptr to inv(Dk) */
303     tp    = t + k*7;
304     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
305     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
306     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
307     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
308     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
309     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
310     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
311   }
312 
313   /* solve U*x = y by back substitution */
314   for (k=mbs-1; k>=0; k--){
315     v  = aa + 49*ai[k];
316     vj = aj + ai[k];
317     tp    = t + k*7;
318     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
319     nz = ai[k+1] - ai[k];
320 
321     tp = t + (*vj)*7;
322     while (nz--) {
323       /* xk += U(k,:)*x(:) */
324       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
325       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
326       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
327       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
328       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
329       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
330       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
331       vj++; tp = t + (*vj)*7;
332       v += 49;
333     }
334     tp    = t + k*7;
335     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
336     idx   = 7*r[k];
337     x[idx]     = x0;
338     x[idx+1]   = x1;
339     x[idx+2]   = x2;
340     x[idx+3]   = x3;
341     x[idx+4]   = x4;
342     x[idx+5]   = x5;
343     x[idx+6]   = x6;
344   }
345 
346   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
347   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
348   PetscLogFlops(49*(2*a->s_nz + mbs));
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
354 int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
355 {
356   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
357   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
358   MatScalar       *aa=a->a,*v,*d;
359   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
360   int             nz,*vj,k,ierr;
361 
362   PetscFunctionBegin;
363 
364   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
365   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
366 
367   /* solve U^T * D * y = b by forward substitution */
368   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
369   for (k=0; k<mbs; k++){
370     v  = aa + 49*ai[k];
371     xp = x + k*7;
372     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
373     nz = ai[k+1] - ai[k];
374     vj = aj + ai[k];
375     xp = x + (*vj)*7;
376     while (nz--) {
377       /* x(:) += U(k,:)^T*(Dk*xk) */
378       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
379       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
380       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
381       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
382       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
383       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
384       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
385       vj++; xp = x + (*vj)*7;
386       v += 49;
387     }
388     /* xk = inv(Dk)*(Dk*xk) */
389     d  = aa+k*49;          /* ptr to inv(Dk) */
390     xp = x + k*7;
391     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
392     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
393     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
394     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
395     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
396     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
397     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
398   }
399 
400   /* solve U*x = y by back substitution */
401   for (k=mbs-1; k>=0; k--){
402     v  = aa + 49*ai[k];
403     xp = x + k*7;
404     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
405     nz = ai[k+1] - ai[k];
406     vj = aj + ai[k];
407     xp = x + (*vj)*7;
408     while (nz--) {
409       /* xk += U(k,:)*x(:) */
410       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
411       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
412       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
413       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
414       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
415       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
416       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
417       vj++;
418       v += 49; xp = x + (*vj)*7;
419     }
420     xp = x + k*7;
421     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
422   }
423 
424   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
425   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
426   PetscLogFlops(49*(2*a->s_nz + mbs));
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
432 int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
433 {
434   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
435   IS              isrow=a->row;
436   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
437   int             nz,*vj,k,ierr,*r,idx;
438   MatScalar       *aa=a->a,*v,*d;
439   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
440 
441   PetscFunctionBegin;
442   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
443   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
444   t  = a->solve_work;
445   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
446 
447   /* solve U^T * D * y = b by forward substitution */
448   tp = t;
449   for (k=0; k<mbs; k++) { /* t <- perm(b) */
450     idx   = 6*r[k];
451     tp[0] = b[idx];
452     tp[1] = b[idx+1];
453     tp[2] = b[idx+2];
454     tp[3] = b[idx+3];
455     tp[4] = b[idx+4];
456     tp[5] = b[idx+5];
457     tp += 6;
458   }
459 
460   for (k=0; k<mbs; k++){
461     v  = aa + 36*ai[k];
462     vj = aj + ai[k];
463     tp = t + k*6;
464     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
465     nz = ai[k+1] - ai[k];
466     tp = t + (*vj)*6;
467     while (nz--) {
468       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
469       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
470       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
471       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
472       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
473       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
474       vj++; tp = t + (*vj)*6;
475       v += 36;
476     }
477 
478     /* xk = inv(Dk)*(Dk*xk) */
479     d  = aa+k*36;          /* ptr to inv(Dk) */
480     tp    = t + k*6;
481     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
482     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
483     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
484     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
485     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
486     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
487   }
488 
489   /* solve U*x = y by back substitution */
490   for (k=mbs-1; k>=0; k--){
491     v  = aa + 36*ai[k];
492     vj = aj + ai[k];
493     tp    = t + k*6;
494     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
495     nz = ai[k+1] - ai[k];
496 
497     tp = t + (*vj)*6;
498     while (nz--) {
499       /* xk += U(k,:)*x(:) */
500       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
501       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
502       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
503       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
504       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
505       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
506       vj++; tp = t + (*vj)*6;
507       v += 36;
508     }
509     tp    = t + k*6;
510     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
511     idx   = 6*r[k];
512     x[idx]     = x0;
513     x[idx+1]   = x1;
514     x[idx+2]   = x2;
515     x[idx+3]   = x3;
516     x[idx+4]   = x4;
517     x[idx+5]   = x5;
518   }
519 
520   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
521   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
522   PetscLogFlops(36*(2*a->s_nz + mbs));
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
528 int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
529 {
530   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
531   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
532   MatScalar       *aa=a->a,*v,*d;
533   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5;
534   int             nz,*vj,k,ierr;
535 
536   PetscFunctionBegin;
537 
538   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
539   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
540 
541   /* solve U^T * D * y = b by forward substitution */
542   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
543   for (k=0; k<mbs; k++){
544     v  = aa + 36*ai[k];
545     xp = x + k*6;
546     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
547     nz = ai[k+1] - ai[k];
548     vj = aj + ai[k];
549     xp = x + (*vj)*6;
550     while (nz--) {
551       /* x(:) += U(k,:)^T*(Dk*xk) */
552       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
553       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
554       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
555       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
556       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
557       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
558       vj++; xp = x + (*vj)*6;
559       v += 36;
560     }
561     /* xk = inv(Dk)*(Dk*xk) */
562     d  = aa+k*36;          /* ptr to inv(Dk) */
563     xp = x + k*6;
564     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
565     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
566     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
567     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
568     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
569     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
570   }
571 
572   /* solve U*x = y by back substitution */
573   for (k=mbs-1; k>=0; k--){
574     v  = aa + 36*ai[k];
575     xp = x + k*6;
576     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
577     nz = ai[k+1] - ai[k];
578     vj = aj + ai[k];
579     xp = x + (*vj)*6;
580     while (nz--) {
581       /* xk += U(k,:)*x(:) */
582       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
583       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
584       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
585       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
586       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
587       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
588       vj++;
589       v += 36; xp = x + (*vj)*6;
590     }
591     xp = x + k*6;
592     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
593   }
594 
595   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
596   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
597   PetscLogFlops(36*(2*a->s_nz + mbs));
598   PetscFunctionReturn(0);
599 }
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
603 int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
604 {
605   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
606   IS              isrow=a->row;
607   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
608   int             nz,*vj,k,ierr,*r,idx;
609   MatScalar       *aa=a->a,*v,*diag;
610   PetscScalar     *x,*b,x0,x1,x2,x3,x4,*t,*tp;
611 
612   PetscFunctionBegin;
613   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
614   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
615   t  = a->solve_work;
616   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
617 
618   /* solve U^T * D * y = b by forward substitution */
619   tp = t;
620   for (k=0; k<mbs; k++) { /* t <- perm(b) */
621     idx   = 5*r[k];
622     tp[0] = b[idx];
623     tp[1] = b[idx+1];
624     tp[2] = b[idx+2];
625     tp[3] = b[idx+3];
626     tp[4] = b[idx+4];
627     tp += 5;
628   }
629 
630   for (k=0; k<mbs; k++){
631     v  = aa + 25*ai[k];
632     vj = aj + ai[k];
633     tp = t + k*5;
634     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
635     nz = ai[k+1] - ai[k];
636 
637     tp = t + (*vj)*5;
638     while (nz--) {
639       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
640       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
641       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
642       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
643       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
644       vj++; tp = t + (*vj)*5;
645       v += 25;
646     }
647 
648     /* xk = inv(Dk)*(Dk*xk) */
649     diag  = aa+k*25;          /* ptr to inv(Dk) */
650     tp    = t + k*5;
651       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
652       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
653       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
654       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
655       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
656   }
657 
658   /* solve U*x = y by back substitution */
659   for (k=mbs-1; k>=0; k--){
660     v  = aa + 25*ai[k];
661     vj = aj + ai[k];
662     tp    = t + k*5;
663     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
664     nz = ai[k+1] - ai[k];
665 
666     tp = t + (*vj)*5;
667     while (nz--) {
668       /* xk += U(k,:)*x(:) */
669       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
670       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
671       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
672       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
673       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
674       vj++; tp = t + (*vj)*5;
675       v += 25;
676     }
677     tp    = t + k*5;
678     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
679     idx   = 5*r[k];
680     x[idx]     = x0;
681     x[idx+1]   = x1;
682     x[idx+2]   = x2;
683     x[idx+3]   = x3;
684     x[idx+4]   = x4;
685   }
686 
687   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
688   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
689   PetscLogFlops(25*(2*a->s_nz + mbs));
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
695 int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
696 {
697   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
698   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
699   MatScalar       *aa=a->a,*v,*diag;
700   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4;
701   int             nz,*vj,k,ierr;
702 
703   PetscFunctionBegin;
704 
705   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
706   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
707 
708   /* solve U^T * D * y = b by forward substitution */
709   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
710   for (k=0; k<mbs; k++){
711     v  = aa + 25*ai[k];
712     xp = x + k*5;
713     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
714     nz = ai[k+1] - ai[k];
715     vj = aj + ai[k];
716     xp = x + (*vj)*5;
717     while (nz--) {
718       /* x(:) += U(k,:)^T*(Dk*xk) */
719       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
720       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
721       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
722       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
723       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
724       vj++; xp = x + (*vj)*5;
725       v += 25;
726     }
727     /* xk = inv(Dk)*(Dk*xk) */
728     diag = aa+k*25;          /* ptr to inv(Dk) */
729     xp   = x + k*5;
730     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
731     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
732     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
733     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
734     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
735   }
736 
737   /* solve U*x = y by back substitution */
738   for (k=mbs-1; k>=0; k--){
739     v  = aa + 25*ai[k];
740     xp = x + k*5;
741     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
742     nz = ai[k+1] - ai[k];
743     vj = aj + ai[k];
744     xp = x + (*vj)*5;
745     while (nz--) {
746       /* xk += U(k,:)*x(:) */
747       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
748       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
749       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
750       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
751       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
752       vj++;
753       v += 25; xp = x + (*vj)*5;
754     }
755     xp = x + k*5;
756     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
757   }
758 
759   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
760   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
761   PetscLogFlops(25*(2*a->s_nz + mbs));
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
767 int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
768 {
769   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
770   IS              isrow=a->row;
771   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
772   int             nz,*vj,k,ierr,*r,idx;
773   MatScalar       *aa=a->a,*v,*diag;
774   PetscScalar     *x,*b,x0,x1,x2,x3,*t,*tp;
775 
776   PetscFunctionBegin;
777   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
778   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
779   t  = a->solve_work;
780   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
781 
782   /* solve U^T * D * y = b by forward substitution */
783   tp = t;
784   for (k=0; k<mbs; k++) { /* t <- perm(b) */
785     idx   = 4*r[k];
786     tp[0] = b[idx];
787     tp[1] = b[idx+1];
788     tp[2] = b[idx+2];
789     tp[3] = b[idx+3];
790     tp += 4;
791   }
792 
793   for (k=0; k<mbs; k++){
794     v  = aa + 16*ai[k];
795     vj = aj + ai[k];
796     tp = t + k*4;
797     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
798     nz = ai[k+1] - ai[k];
799 
800     tp = t + (*vj)*4;
801     while (nz--) {
802       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
803       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
804       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
805       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
806       vj++; tp = t + (*vj)*4;
807       v += 16;
808     }
809 
810     /* xk = inv(Dk)*(Dk*xk) */
811     diag  = aa+k*16;          /* ptr to inv(Dk) */
812     tp    = t + k*4;
813     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
814     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
815     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
816     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
817   }
818 
819   /* solve U*x = y by back substitution */
820   for (k=mbs-1; k>=0; k--){
821     v  = aa + 16*ai[k];
822     vj = aj + ai[k];
823     tp    = t + k*4;
824     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
825     nz = ai[k+1] - ai[k];
826 
827     tp = t + (*vj)*4;
828     while (nz--) {
829       /* xk += U(k,:)*x(:) */
830       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
831       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
832       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
833       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
834       vj++; tp = t + (*vj)*4;
835       v += 16;
836     }
837     tp    = t + k*4;
838     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
839     idx        = 4*r[k];
840     x[idx]     = x0;
841     x[idx+1]   = x1;
842     x[idx+2]   = x2;
843     x[idx+3]   = x3;
844   }
845 
846   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
848   PetscLogFlops(16*(2*a->s_nz + mbs));
849   PetscFunctionReturn(0);
850 }
851 
852 /*
853    Special case where the matrix was factored in the natural ordering.
854    This eliminates the need for the column and row permutation.
855 */
856 #undef __FUNCT__
857 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
858 int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
859 {
860   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
861   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
862   MatScalar       *aa=a->a,*v,*diag;
863   PetscScalar     *x,*xp,*b,x0,x1,x2,x3;
864   int             nz,*vj,k,ierr;
865 
866   PetscFunctionBegin;
867 
868   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
869   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
870 
871   /* solve U^T * D * y = b by forward substitution */
872   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
873   for (k=0; k<mbs; k++){
874     v  = aa + 16*ai[k];
875     xp = x + k*4;
876     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
877     nz = ai[k+1] - ai[k];
878     vj = aj + ai[k];
879     xp = x + (*vj)*4;
880     while (nz--) {
881       /* x(:) += U(k,:)^T*(Dk*xk) */
882       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
883       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
884       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
885       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
886       vj++; xp = x + (*vj)*4;
887       v += 16;
888     }
889     /* xk = inv(Dk)*(Dk*xk) */
890     diag = aa+k*16;          /* ptr to inv(Dk) */
891     xp   = x + k*4;
892     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
893     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
894     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
895     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
896   }
897 
898   /* solve U*x = y by back substitution */
899   for (k=mbs-1; k>=0; k--){
900     v  = aa + 16*ai[k];
901     xp = x + k*4;
902     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
903     nz = ai[k+1] - ai[k];
904     vj = aj + ai[k];
905     xp = x + (*vj)*4;
906     while (nz--) {
907       /* xk += U(k,:)*x(:) */
908       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
909       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
910       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
911       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
912       vj++;
913       v += 16; xp = x + (*vj)*4;
914     }
915     xp = x + k*4;
916     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
917   }
918 
919   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
920   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
921   PetscLogFlops(16*(2*a->s_nz + mbs));
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
927 int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
928 {
929   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
930   IS              isrow=a->row;
931   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
932   int             nz,*vj,k,ierr,*r,idx;
933   MatScalar       *aa=a->a,*v,*diag;
934   PetscScalar     *x,*b,x0,x1,x2,*t,*tp;
935 
936   PetscFunctionBegin;
937   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
938   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
939   t  = a->solve_work;
940   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
941 
942   /* solve U^T * D * y = b by forward substitution */
943   tp = t;
944   for (k=0; k<mbs; k++) { /* t <- perm(b) */
945     idx   = 3*r[k];
946     tp[0] = b[idx];
947     tp[1] = b[idx+1];
948     tp[2] = b[idx+2];
949     tp += 3;
950   }
951 
952   for (k=0; k<mbs; k++){
953     v  = aa + 9*ai[k];
954     vj = aj + ai[k];
955     tp = t + k*3;
956     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
957     nz = ai[k+1] - ai[k];
958 
959     tp = t + (*vj)*3;
960     while (nz--) {
961       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
962       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
963       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
964       vj++; tp = t + (*vj)*3;
965       v += 9;
966     }
967 
968     /* xk = inv(Dk)*(Dk*xk) */
969     diag  = aa+k*9;          /* ptr to inv(Dk) */
970     tp    = t + k*3;
971     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
972     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
973     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
974   }
975 
976   /* solve U*x = y by back substitution */
977   for (k=mbs-1; k>=0; k--){
978     v  = aa + 9*ai[k];
979     vj = aj + ai[k];
980     tp    = t + k*3;
981     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
982     nz = ai[k+1] - ai[k];
983 
984     tp = t + (*vj)*3;
985     while (nz--) {
986       /* xk += U(k,:)*x(:) */
987       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
988       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
989       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
990       vj++; tp = t + (*vj)*3;
991       v += 9;
992     }
993     tp    = t + k*3;
994     tp[0] = x0; tp[1] = x1; tp[2] = x2;
995     idx      = 3*r[k];
996     x[idx]   = x0;
997     x[idx+1] = x1;
998     x[idx+2] = x2;
999   }
1000 
1001   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1002   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1003   PetscLogFlops(9*(2*a->s_nz + mbs));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 /*
1008    Special case where the matrix was factored in the natural ordering.
1009    This eliminates the need for the column and row permutation.
1010 */
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1013 int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1014 {
1015   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1016   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1017   MatScalar       *aa=a->a,*v,*diag;
1018   PetscScalar     *x,*xp,*b,x0,x1,x2;
1019   int             nz,*vj,k,ierr;
1020 
1021   PetscFunctionBegin;
1022 
1023   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1024   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1025 
1026   /* solve U^T * D * y = b by forward substitution */
1027   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1028   for (k=0; k<mbs; k++){
1029     v  = aa + 9*ai[k];
1030     xp = x + k*3;
1031     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1032     nz = ai[k+1] - ai[k];
1033     vj = aj + ai[k];
1034     xp = x + (*vj)*3;
1035     while (nz--) {
1036       /* x(:) += U(k,:)^T*(Dk*xk) */
1037       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1038       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1039       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1040       vj++; xp = x + (*vj)*3;
1041       v += 9;
1042     }
1043     /* xk = inv(Dk)*(Dk*xk) */
1044     diag = aa+k*9;          /* ptr to inv(Dk) */
1045     xp   = x + k*3;
1046     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1047     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1048     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1049   }
1050 
1051   /* solve U*x = y by back substitution */
1052   for (k=mbs-1; k>=0; k--){
1053     v  = aa + 9*ai[k];
1054     xp = x + k*3;
1055     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1056     nz = ai[k+1] - ai[k];
1057     vj = aj + ai[k];
1058     xp = x + (*vj)*3;
1059     while (nz--) {
1060       /* xk += U(k,:)*x(:) */
1061       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1062       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1063       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1064       vj++;
1065       v += 9; xp = x + (*vj)*3;
1066     }
1067     xp = x + k*3;
1068     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1069   }
1070 
1071   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   PetscLogFlops(9*(2*a->s_nz + mbs));
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1079 int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1080 {
1081   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ *)A->data;
1082   IS              isrow=a->row;
1083   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1084   int             nz,*vj,k,k2,ierr,*r,idx;
1085   MatScalar       *aa=a->a,*v,*diag;
1086   PetscScalar     *x,*b,x0,x1,*t;
1087 
1088   PetscFunctionBegin;
1089   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1090   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1091   t  = a->solve_work;
1092   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
1093   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1094 
1095   /* solve U^T * D * y = perm(b) by forward substitution */
1096   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1097     idx = 2*r[k];
1098     t[k*2]   = b[idx];
1099     t[k*2+1] = b[idx+1];
1100   }
1101   for (k=0; k<mbs; k++){
1102     v  = aa + 4*ai[k];
1103     vj = aj + ai[k];
1104     k2 = k*2;
1105     x0 = t[k2]; x1 = t[k2+1];
1106     nz = ai[k+1] - ai[k];
1107     while (nz--) {
1108       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1109       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1110       vj++; v += 4;
1111     }
1112     diag = aa+k*4;  /* ptr to inv(Dk) */
1113     t[k2]   = diag[0]*x0 + diag[2]*x1;
1114     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1115   }
1116 
1117   /* solve U*x = y by back substitution */
1118   for (k=mbs-1; k>=0; k--){
1119     v  = aa + 4*ai[k];
1120     vj = aj + ai[k];
1121     k2 = k*2;
1122     x0 = t[k2]; x1 = t[k2+1];
1123     nz = ai[k+1] - ai[k];
1124     while (nz--) {
1125       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1126       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1127       vj++; v += 4;
1128     }
1129     t[k2]    = x0;
1130     t[k2+1]  = x1;
1131     idx      = 2*r[k];
1132     x[idx]   = x0;
1133     x[idx+1] = x1;
1134   }
1135 
1136   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1137   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1138   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1139   PetscLogFlops(4*(2*a->s_nz + mbs));
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /*
1144    Special case where the matrix was factored in the natural ordering.
1145    This eliminates the need for the column and row permutation.
1146 */
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1149 int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1150 {
1151   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1152   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1153   MatScalar       *aa=a->a,*v,*diag;
1154   PetscScalar     *x,*b,x0,x1;
1155   int             nz,*vj,k,k2,ierr;
1156 
1157   PetscFunctionBegin;
1158 
1159   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1160   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1161 
1162   /* solve U^T * D * y = b by forward substitution */
1163   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1164   for (k=0; k<mbs; k++){
1165     v  = aa + 4*ai[k];
1166     vj = aj + ai[k];
1167     k2 = k*2;
1168     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1169     nz = ai[k+1] - ai[k];
1170 
1171     while (nz--) {
1172       /* x(:) += U(k,:)^T*(Dk*xk) */
1173       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1174       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1175       vj++; v += 4;
1176     }
1177     /* xk = inv(Dk)*(Dk*xk) */
1178     diag = aa+k*4;          /* ptr to inv(Dk) */
1179     x[k2]   = diag[0]*x0 + diag[2]*x1;
1180     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1181   }
1182 
1183   /* solve U*x = y by back substitution */
1184   for (k=mbs-1; k>=0; k--){
1185     v  = aa + 4*ai[k];
1186     vj = aj + ai[k];
1187     k2 = k*2;
1188     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1189     nz = ai[k+1] - ai[k];
1190     while (nz--) {
1191       /* xk += U(k,:)*x(:) */
1192       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1193       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1194       vj++; v += 4;
1195     }
1196     x[k2]     = x0;
1197     x[k2+1]   = x1;
1198   }
1199 
1200   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1202   PetscLogFlops(4*(2*a->s_nz + mbs)); /* bs2*(2*a->s_nz + mbs) */
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1208 int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1209 {
1210   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ *)A->data;
1211   IS              isrow=a->row;
1212   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip;
1213   MatScalar       *aa=a->a,*v;
1214   PetscScalar     *x,*b,xk,*t;
1215   int             nz,*vj,k;
1216 
1217   PetscFunctionBegin;
1218   if (!mbs) PetscFunctionReturn(0);
1219 
1220   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1221   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1222   t    = a->solve_work;
1223 
1224   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1225 
1226   /* solve U^T*D*y = perm(b) by forward substitution */
1227   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1228   for (k=0; k<mbs; k++){
1229     v  = aa + ai[k];
1230     vj = aj + ai[k];
1231     xk = t[k];
1232     nz = ai[k+1] - ai[k];
1233     while (nz--) t[*vj++] += (*v++) * xk;
1234     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1235   }
1236 
1237   /* solve U*x = y by back substitution */
1238   for (k=mbs-1; k>=0; k--){
1239     v  = aa + ai[k];
1240     vj = aj + ai[k];
1241     xk = t[k];
1242     nz = ai[k+1] - ai[k];
1243     while (nz--) xk += (*v++) * t[*vj++];
1244     t[k]      = xk;
1245     x[rip[k]] = xk;
1246   }
1247 
1248   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1249   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1250   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1251   PetscLogFlops(4*a->s_nz + A->m);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1257 int MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1258 {
1259   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1260   int          ierr;
1261 
1262   PetscFunctionBegin;
1263   if (a->bs == 1) {
1264     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1265   } else {
1266     IS              isrow=a->row;
1267     int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1268     MatScalar       *aa=a->a,*v;
1269     PetscScalar     *x,*b,*t;
1270     int             nz,*vj,k,n;
1271     if (bb->n > a->solves_work_n) {
1272       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1273       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1274       a->solves_work_n = bb->n;
1275     }
1276     n    = bb->n;
1277     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1278     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1279     t    = a->solves_work;
1280 
1281     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1282 
1283     /* solve U^T*D*y = perm(b) by forward substitution */
1284     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1285     for (k=0; k<mbs; k++){
1286       v  = aa + ai[k];
1287       vj = aj + ai[k];
1288       nz = ai[k+1] - ai[k];
1289       while (nz--) {
1290         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1291         v++;vj++;
1292       }
1293       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1294     }
1295 
1296     /* solve U*x = y by back substitution */
1297     for (k=mbs-1; k>=0; k--){
1298       v  = aa + ai[k];
1299       vj = aj + ai[k];
1300       nz = ai[k+1] - ai[k];
1301       while (nz--) {
1302         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1303         v++;vj++;
1304       }
1305       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1306     }
1307 
1308     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1309     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1310     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1311     PetscLogFlops(bb->n*(4*a->s_nz + A->m));
1312   }
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /*
1317       Special case where the matrix was ILU(0) factored in the natural
1318    ordering. This eliminates the need for the column and row permutation.
1319 */
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1322 int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1323 {
1324   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1325   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr;
1326   MatScalar       *aa=a->a,*v;
1327   PetscScalar     *x,*b,xk;
1328   int             nz,*vj,k;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1332   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1333 
1334   /* solve U^T*D*y = b by forward substitution */
1335   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1336   for (k=0; k<mbs; k++){
1337     v  = aa + ai[k] + 1;
1338     vj = aj + ai[k] + 1;
1339     xk = x[k];
1340     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1341     while (nz--) x[*vj++] += (*v++) * xk;
1342     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1343   }
1344 
1345   /* solve U*x = y by back substitution */
1346   for (k=mbs-2; k>=0; k--){
1347     v  = aa + ai[k] + 1;
1348     vj = aj + ai[k] + 1;
1349     xk = x[k];
1350     nz = ai[k+1] - ai[k] - 1;
1351     while (nz--) xk += (*v++) * x[*vj++];
1352     x[k] = xk;
1353   }
1354 
1355   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1356   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1357   PetscLogFlops(4*a->s_nz + A->m);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1364 int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1365 {
1366   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1367   int         *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1368   int         *jutmp,bs = a->bs,bs2=a->bs2;
1369   int         m,realloc = 0,*levtmp,levels = info->levels;
1370   int         *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1371   int         incrlev,*lev,shift,prow,nz;
1372   int         *il,ili,nextprow;
1373   PetscReal   f = info->fill;
1374   PetscTruth  perm_identity;
1375 
1376   PetscFunctionBegin;
1377   /* check whether perm is the identity mapping */
1378   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1379 
1380   /* special case that simply copies fill pattern */
1381   if (!levels && perm_identity && bs==1) {
1382     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1383     (*B)->factor    = FACTOR_CHOLESKY;
1384     b               = (Mat_SeqSBAIJ*)(*B)->data;
1385     b->row          = perm;
1386     b->icol         = perm;
1387     b->factor_damping   = info->damping;
1388     b->factor_zeropivot = info->zeropivot;
1389     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1390     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1391     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1392     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1393     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1394     PetscFunctionReturn(0);
1395   }
1396 
1397   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1398   if (levels > 0 && perm_identity && bs==1 ){
1399     if (!perm_identity) a->permute = PETSC_TRUE;
1400 
1401   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1402 
1403   if (perm_identity){ /* without permutation */
1404     ai = a->i; aj = a->j;
1405   } else {            /* non-trivial permutation */
1406     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1407     ai = a->inew; aj = a->jnew;
1408   }
1409 
1410   /* initialization */
1411   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1412   umax  = (int)(f*ai[mbs] + 1);
1413   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1414   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1415   iu[0] = 0;
1416   juidx = 0; /* index for ju */
1417   ierr  = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1418   q      = jl + mbs;   /* linked list for col index of active row */
1419   levtmp = q + mbs;
1420   il     = levtmp + mbs;
1421   for (i=0; i<mbs; i++){
1422     jl[i] = mbs;
1423     q[i]  = 0;
1424     il[i] = 0;
1425   }
1426 
1427   /* for each row k */
1428   for (k=0; k<mbs; k++){
1429     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1430     q[k] = mbs;
1431     /* initialize nonzero structure of k-th row to row rip[k] of A */
1432     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1433     jmax = ai[rip[k]+1];
1434     for (j=jmin; j<jmax; j++){
1435       vj = rip[aj[j]]; /* col. value */
1436       if(vj > k){
1437         qm = k;
1438         do {
1439           m  = qm; qm = q[m];
1440         } while(qm < vj);
1441         if (qm == vj) {
1442           SETERRQ(1," error: duplicate entry in A\n");
1443         }
1444         nzk++;
1445         q[m]  = vj;
1446         q[vj] = qm;
1447         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1448       } /* if(vj > k) */
1449     } /* for (j=jmin; j<jmax; j++) */
1450 
1451     /* modify nonzero structure of k-th row by computing fill-in
1452        for each row i to be merged in */
1453     prow = k;
1454     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1455 
1456     while (prow < k){
1457       nextprow = jl[prow];
1458       /* merge row prow into k-th row */
1459       ili  = il[prow];
1460       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1461       jmax = iu[prow+1];
1462       qm   = k;
1463       for (j=jmin; j<jmax; j++){
1464         vj = ju[j];
1465         incrlev = lev[j] + 1;
1466         if (incrlev > levels) continue;
1467         do {
1468           m = qm; qm = q[m];
1469         } while (qm < vj);
1470         if (qm != vj){  /* a fill */
1471           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1472           levtmp[vj]  = incrlev;
1473         } else {
1474           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1475         }
1476       }
1477       if (jmin < jmax){ /* update il and jl */
1478         il[prow] = jmin;
1479         j = ju[jmin];
1480         jl[prow] = jl[j]; jl[j] = prow;
1481       }
1482       prow = nextprow;
1483     }
1484 
1485     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1486     if (nzk > 0){
1487       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1488       jl[k] = jl[i]; jl[i] = k;
1489       il[k] = iu[k] + 1;
1490     }
1491     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1492 
1493     /* allocate more space to ju if needed */
1494     if (iu[k+1] > umax) {
1495       /* estimate how much additional space we will need */
1496       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1497       /* just double the memory each time */
1498       maxadd = umax;
1499       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1500       umax += maxadd;
1501 
1502       /* allocate a longer ju */
1503       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1504       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1505       ierr = PetscFree(ju);CHKERRQ(ierr);
1506       ju   = jutmp;
1507 
1508       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1509       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr);
1510       ierr     = PetscFree(lev);CHKERRQ(ierr);
1511       lev      = jutmp;
1512       realloc += 2; /* count how many times we realloc */
1513     }
1514 
1515     /* save nonzero structure of k-th row in ju */
1516     ju[juidx]  = k; /* diag[k] */
1517     lev[juidx] = 0;
1518     juidx++;
1519     i = k;
1520     while (nzk --) {
1521       i           = q[i];
1522       ju[juidx] = i;
1523       lev[juidx] = levtmp[i];
1524       juidx++;
1525     }
1526   } /* end of for (k=0; k<mbs; k++) */
1527 
1528   if (ai[mbs] != 0) {
1529     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1530     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1531     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1532     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1533     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1534   } else {
1535      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1536   }
1537 
1538   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1539   ierr = PetscFree(jl);CHKERRQ(ierr);
1540   ierr = PetscFree(lev);CHKERRQ(ierr);
1541 
1542   /* put together the new matrix */
1543   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
1544   /* PetscLogObjectParent(*B,iperm); */
1545   b = (Mat_SeqSBAIJ*)(*B)->data;
1546   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1547   b->singlemalloc = PETSC_FALSE;
1548   /* the next line frees the default space generated by the Create() */
1549   ierr = PetscFree(b->a);CHKERRQ(ierr);
1550   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1551   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1552   b->j    = ju;
1553   b->i    = iu;
1554   b->diag = 0;
1555   b->ilen = 0;
1556   b->imax = 0;
1557   b->row  = perm;
1558   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1559   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1560   b->icol = perm;
1561   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1562   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1563   /* In b structure:  Free imax, ilen, old a, old j.
1564      Allocate idnew, solve_work, new a, new j */
1565   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1566   b->s_maxnz          = b->s_nz = iu[mbs];
1567   b->factor_damping   = info->damping;
1568   b->factor_zeropivot = info->zeropivot;
1569 
1570   (*B)->factor                 = FACTOR_CHOLESKY;
1571   (*B)->info.factor_mallocs    = realloc;
1572   (*B)->info.fill_ratio_given  = f;
1573   if (ai[mbs] != 0) {
1574     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1575   } else {
1576     (*B)->info.fill_ratio_needed = 0.0;
1577   }
1578 
1579 
1580   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1581   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1582   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1583 
1584   PetscFunctionReturn(0);
1585   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1586 
1587   if (!perm_identity) a->permute = PETSC_TRUE;
1588   if (perm_identity){
1589     ai = a->i; aj = a->j;
1590   } else { /*  non-trivial permutation */
1591     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1592     ai = a->inew; aj = a->jnew;
1593   }
1594 
1595   /* initialization */
1596   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1597   umax  = (int)(f*ai[mbs] + 1);
1598   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1599   umax += mbs + 1;
1600   shift = mbs + 1;
1601   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1602   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1603   iu[0] = mbs + 1;
1604   juidx = mbs + 1;
1605   /* prowl: linked list for pivot row */
1606   ierr    = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr);
1607   /* q: linked list for col index */
1608   q       = prowl + mbs;
1609   levtmp  = q     + mbs;
1610 
1611   for (i=0; i<mbs; i++){
1612     prowl[i] = mbs;
1613     q[i] = 0;
1614   }
1615 
1616   /* for each row k */
1617   for (k=0; k<mbs; k++){
1618     nzk  = 0;
1619     q[k] = mbs;
1620     /* copy current row into linked list */
1621     nz = ai[rip[k]+1] - ai[rip[k]];
1622     j = ai[rip[k]];
1623     while (nz--){
1624       vj = rip[aj[j++]];
1625       if (vj > k){
1626         qm = k;
1627         do {
1628           m  = qm; qm = q[m];
1629         } while(qm < vj);
1630         if (qm == vj) {
1631           SETERRQ(1," error: duplicate entry in A\n");
1632         }
1633         nzk++;
1634         q[m]       = vj;
1635         q[vj]      = qm;
1636         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1637       }
1638     }
1639 
1640     /* modify nonzero structure of k-th row by computing fill-in
1641        for each row prow to be merged in */
1642     prow = k;
1643     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1644 
1645     while (prow < k){
1646       /* merge row prow into k-th row */
1647       jmin = iu[prow] + 1;
1648       jmax = iu[prow+1];
1649       qm = k;
1650       for (j=jmin; j<jmax; j++){
1651         incrlev = lev[j-shift] + 1;
1652 	if (incrlev > levels) continue;
1653 
1654         vj      = ju[j];
1655         do {
1656           m = qm; qm = q[m];
1657         } while (qm < vj);
1658         if (qm != vj){      /* a fill */
1659           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1660           levtmp[vj] = incrlev;
1661         } else {
1662           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1663         }
1664       }
1665       prow = prowl[prow]; /* next pivot row */
1666     }
1667 
1668     /* add k to row list for first nonzero element in k-th row */
1669     if (nzk > 1){
1670       i = q[k]; /* col value of first nonzero element in k_th row of U */
1671       prowl[k] = prowl[i]; prowl[i] = k;
1672     }
1673     iu[k+1] = iu[k] + nzk;
1674 
1675     /* allocate more space to ju and lev if needed */
1676     if (iu[k+1] > umax) {
1677       /* estimate how much additional space we will need */
1678       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1679       /* just double the memory each time */
1680       maxadd = umax;
1681       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1682       umax += maxadd;
1683 
1684       /* allocate a longer ju */
1685       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1686       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1687       ierr     = PetscFree(ju);CHKERRQ(ierr);
1688       ju       = jutmp;
1689 
1690       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1691       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr);
1692       ierr     = PetscFree(lev);CHKERRQ(ierr);
1693       lev      = jutmp;
1694       realloc += 2; /* count how many times we realloc */
1695     }
1696 
1697     /* save nonzero structure of k-th row in ju */
1698     i=k;
1699     while (nzk --) {
1700       i                = q[i];
1701       ju[juidx]        = i;
1702       lev[juidx-shift] = levtmp[i];
1703       juidx++;
1704     }
1705   }
1706 
1707   if (ai[mbs] != 0) {
1708     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1709     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1710     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1711     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1712     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1713   } else {
1714     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1715   }
1716 
1717   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1718   ierr = PetscFree(prowl);CHKERRQ(ierr);
1719   ierr = PetscFree(lev);CHKERRQ(ierr);
1720 
1721   /* put together the new matrix */
1722   ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr);
1723   /* PetscLogObjectParent(*B,iperm); */
1724   b    = (Mat_SeqSBAIJ*)(*B)->data;
1725   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1726   b->singlemalloc = PETSC_FALSE;
1727   /* the next line frees the default space generated by the Create() */
1728   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1729   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1730   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1731   b->j    = ju;
1732   b->i    = iu;
1733   b->diag = 0;
1734   b->ilen = 0;
1735   b->imax = 0;
1736 
1737   if (b->row) {
1738     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1739   }
1740   if (b->icol) {
1741     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1742   }
1743   b->row  = perm;
1744   b->icol = perm;
1745   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1746   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1747   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1748   /* In b structure:  Free imax, ilen, old a, old j.
1749      Allocate idnew, solve_work, new a, new j */
1750   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1751   b->s_maxnz = b->s_nz = iu[mbs];
1752 
1753   (*B)->factor                 = FACTOR_CHOLESKY;
1754   (*B)->info.factor_mallocs    = realloc;
1755   (*B)->info.fill_ratio_given  = f;
1756   if (ai[mbs] != 0) {
1757     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1758   } else {
1759     (*B)->info.fill_ratio_needed = 0.0;
1760   }
1761 
1762   if (perm_identity){
1763     switch (bs) {
1764       case 1:
1765         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1766         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1767         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1768         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1769         break;
1770       case 2:
1771         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1772         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1773         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1774         break;
1775       case 3:
1776         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1777         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1778         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1779         break;
1780       case 4:
1781         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1782         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1783         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1784         break;
1785       case 5:
1786         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1787         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1788         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1789         break;
1790       case 6:
1791         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1792         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1793         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1794         break;
1795       case 7:
1796         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1797         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1798         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1799       break;
1800       default:
1801         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1802         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1803         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1804       break;
1805     }
1806   }
1807 
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 
1812 
1813