xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 7dcf0eaabfd24a99cab9067fb7513f2271c036d2)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   PetscInt        nz,*vj,k,*r,idx,k1;
21   PetscInt        bs=A->rmap.bs,bs2 = a->bs2;
22   MatScalar       *aa=a->a,*v,*diag;
23   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   t  = a->solve_work;
29   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
30   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
31 
32   /* solve U^T * D * y = b by forward substitution */
33   xk = t;
34   for (k=0; k<mbs; k++) { /* t <- perm(b) */
35     idx   = bs*r[k];
36     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37   }
38   for (k=0; k<mbs; k++){
39     v  = aa + bs2*ai[k];
40     xk = t + k*bs;      /* Dk*xk = k-th block of x */
41     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
42     nz = ai[k+1] - ai[k];
43     vj = aj + ai[k];
44     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
45     while (nz--) {
46       /* x(:) += U(k,:)^T*(Dk*xk) */
47       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48       vj++; xj = t + (*vj)*bs;
49       v += bs2;
50     }
51     /* xk = inv(Dk)*(Dk*xk) */
52     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
53     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54   }
55 
56   /* solve U*x = y by back substitution */
57   for (k=mbs-1; k>=0; k--){
58     v  = aa + bs2*ai[k];
59     xk = t + k*bs;        /* xk */
60     nz = ai[k+1] - ai[k];
61     vj = aj + ai[k];
62     xj = t + (*vj)*bs;
63     while (nz--) {
64       /* xk += U(k,:)*x(:) */
65       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66       vj++;
67       v += bs2; xj = t + (*vj)*bs;
68     }
69     idx   = bs*r[k];
70     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71   }
72 
73   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
74   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
75   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
76   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
77   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
83 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
84 {
85   PetscErrorCode ierr;
86   PetscInt       nz,*vj,k;
87   PetscInt       bs2 = bs*bs;
88   MatScalar      *v,*diag;
89   PetscScalar    *xk,*xj,*xk_tmp;
90 
91   PetscFunctionBegin;
92   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
93   for (k=0; k<mbs; k++){
94     v  = aa + bs2*ai[k];
95     xk = x + k*bs;      /* Dk*xk = k-th block of x */
96     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
97     nz = ai[k+1] - ai[k];
98     vj = aj + ai[k];
99     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
100     while (nz--) {
101       /* x(:) += U(k,:)^T*(Dk*xk) */
102       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
103       vj++; xj = x + (*vj)*bs;
104       v += bs2;
105     }
106     /* xk = inv(Dk)*(Dk*xk) */
107     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
108     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
109   }
110   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
116 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
117 {
118   PetscErrorCode ierr;
119   PetscInt       nz,*vj,k;
120   PetscInt       bs2 = bs*bs;
121   MatScalar      *v,*diag;
122   PetscScalar    *xk,*xj;
123 
124   PetscFunctionBegin;
125   for (k=mbs-1; k>=0; k--){
126     v  = aa + bs2*ai[k];
127     xk = x + k*bs;        /* xk */
128     nz = ai[k+1] - ai[k];
129     vj = aj + ai[k];
130     xj = x + (*vj)*bs;
131     while (nz--) {
132       /* xk += U(k,:)*x(:) */
133       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
134       vj++;
135       v += bs2; xj = x + (*vj)*bs;
136     }
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
143 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
144 {
145   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
146   PetscErrorCode ierr;
147   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
148   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
149   MatScalar      *aa=a->a;
150   PetscScalar    *x,*b;
151 
152   PetscFunctionBegin;
153   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
155 
156   /* solve U^T * D * y = b by forward substitution */
157   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
158   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
159 
160   /* solve U*x = y by back substitution */
161   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
162 
163   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
164   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
165   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering"
171 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
172 {
173   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
174   PetscErrorCode ierr;
175   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
176   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
177   MatScalar      *aa=a->a;
178   PetscScalar    *x,*b;
179 
180   PetscFunctionBegin;
181   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
182   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
183   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
184   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
185   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
186   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
187   ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr);
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering"
193 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
194 {
195   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
196   PetscErrorCode ierr;
197   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
198   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
199   MatScalar      *aa=a->a;
200   PetscScalar    *x,*b;
201 
202   PetscFunctionBegin;
203   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
204   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
205   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
206   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
207   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
208   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
209   ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
215 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
216 {
217   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
218   IS             isrow=a->row;
219   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
220   PetscErrorCode ierr;
221   PetscInt       nz,*vj,k,*r,idx;
222   MatScalar      *aa=a->a,*v,*d;
223   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
224 
225   PetscFunctionBegin;
226   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
227   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
228   t  = a->solve_work;
229   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
230 
231   /* solve U^T * D * y = b by forward substitution */
232   tp = t;
233   for (k=0; k<mbs; k++) { /* t <- perm(b) */
234     idx   = 7*r[k];
235     tp[0] = b[idx];
236     tp[1] = b[idx+1];
237     tp[2] = b[idx+2];
238     tp[3] = b[idx+3];
239     tp[4] = b[idx+4];
240     tp[5] = b[idx+5];
241     tp[6] = b[idx+6];
242     tp += 7;
243   }
244 
245   for (k=0; k<mbs; k++){
246     v  = aa + 49*ai[k];
247     vj = aj + ai[k];
248     tp = t + k*7;
249     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
250     nz = ai[k+1] - ai[k];
251     tp = t + (*vj)*7;
252     while (nz--) {
253       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
254       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
255       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
256       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
257       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
258       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
259       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
260       vj++; tp = t + (*vj)*7;
261       v += 49;
262     }
263 
264     /* xk = inv(Dk)*(Dk*xk) */
265     d  = aa+k*49;          /* ptr to inv(Dk) */
266     tp    = t + k*7;
267     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
268     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
269     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
270     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
271     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
272     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
273     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
274   }
275 
276   /* solve U*x = y by back substitution */
277   for (k=mbs-1; k>=0; k--){
278     v  = aa + 49*ai[k];
279     vj = aj + ai[k];
280     tp    = t + k*7;
281     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
282     nz = ai[k+1] - ai[k];
283 
284     tp = t + (*vj)*7;
285     while (nz--) {
286       /* xk += U(k,:)*x(:) */
287       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
288       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
289       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
290       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
291       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
292       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
293       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
294       vj++; tp = t + (*vj)*7;
295       v += 49;
296     }
297     tp    = t + k*7;
298     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
299     idx   = 7*r[k];
300     x[idx]     = x0;
301     x[idx+1]   = x1;
302     x[idx+2]   = x2;
303     x[idx+3]   = x3;
304     x[idx+4]   = x4;
305     x[idx+5]   = x5;
306     x[idx+6]   = x6;
307   }
308 
309   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
310   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
312   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
318 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
319 {
320   MatScalar      *v,*d;
321   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
322   PetscInt       nz,*vj,k;
323 
324   PetscFunctionBegin;
325   for (k=0; k<mbs; k++){
326     v  = aa + 49*ai[k];
327     xp = x + k*7;
328     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
329     nz = ai[k+1] - ai[k];
330     vj = aj + ai[k];
331     xp = x + (*vj)*7;
332     while (nz--) {
333       /* x(:) += U(k,:)^T*(Dk*xk) */
334       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
335       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
336       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
337       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
338       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
339       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
340       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
341       vj++; xp = x + (*vj)*7;
342       v += 49;
343     }
344     /* xk = inv(Dk)*(Dk*xk) */
345     d  = aa+k*49;          /* ptr to inv(Dk) */
346     xp = x + k*7;
347     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
348     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
349     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
350     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
351     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
352     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
353     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
360 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
361 {
362   MatScalar      *v,*d;
363   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
364   PetscInt       nz,*vj,k;
365 
366   PetscFunctionBegin;
367   for (k=mbs-1; k>=0; k--){
368     v  = aa + 49*ai[k];
369     xp = x + k*7;
370     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
371     nz = ai[k+1] - ai[k];
372     vj = aj + ai[k];
373     xp = x + (*vj)*7;
374     while (nz--) {
375       /* xk += U(k,:)*x(:) */
376       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
377       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
378       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
379       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
380       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
381       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
382       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
383       vj++;
384       v += 49; xp = x + (*vj)*7;
385     }
386     xp = x + k*7;
387     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
394 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
395 {
396   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
397   PetscErrorCode ierr;
398   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
399   MatScalar      *aa=a->a;
400   PetscScalar    *x,*b;
401 
402   PetscFunctionBegin;
403   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
404   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
405 
406   /* solve U^T * D * y = b by forward substitution */
407   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
408   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
409 
410   /* solve U*x = y by back substitution */
411   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
412 
413   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
414   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
415   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering"
421 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
422 {
423   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
424   PetscErrorCode ierr;
425   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
426   MatScalar      *aa=a->a;
427   PetscScalar    *x,*b;
428 
429   PetscFunctionBegin;
430   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
431   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
432   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
433   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
434   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
435   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
436   ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering"
442 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
443 {
444   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
445   PetscErrorCode ierr;
446   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
447   MatScalar      *aa=a->a;
448   PetscScalar    *x,*b;
449 
450   PetscFunctionBegin;
451   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
452   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
453   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
454   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
456   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
457   ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
463 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
464 {
465   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
466   IS             isrow=a->row;
467   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
468   PetscErrorCode ierr;
469   PetscInt       nz,*vj,k,*r,idx;
470   MatScalar      *aa=a->a,*v,*d;
471   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
472 
473   PetscFunctionBegin;
474   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
475   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
476   t  = a->solve_work;
477   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
478 
479   /* solve U^T * D * y = b by forward substitution */
480   tp = t;
481   for (k=0; k<mbs; k++) { /* t <- perm(b) */
482     idx   = 6*r[k];
483     tp[0] = b[idx];
484     tp[1] = b[idx+1];
485     tp[2] = b[idx+2];
486     tp[3] = b[idx+3];
487     tp[4] = b[idx+4];
488     tp[5] = b[idx+5];
489     tp += 6;
490   }
491 
492   for (k=0; k<mbs; k++){
493     v  = aa + 36*ai[k];
494     vj = aj + ai[k];
495     tp = t + k*6;
496     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
497     nz = ai[k+1] - ai[k];
498     tp = t + (*vj)*6;
499     while (nz--) {
500       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
501       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
502       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
503       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
504       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
505       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
506       vj++; tp = t + (*vj)*6;
507       v += 36;
508     }
509 
510     /* xk = inv(Dk)*(Dk*xk) */
511     d  = aa+k*36;          /* ptr to inv(Dk) */
512     tp    = t + k*6;
513     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
514     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
515     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
516     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
517     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
518     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
519   }
520 
521   /* solve U*x = y by back substitution */
522   for (k=mbs-1; k>=0; k--){
523     v  = aa + 36*ai[k];
524     vj = aj + ai[k];
525     tp    = t + k*6;
526     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
527     nz = ai[k+1] - ai[k];
528 
529     tp = t + (*vj)*6;
530     while (nz--) {
531       /* xk += U(k,:)*x(:) */
532       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
533       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
534       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
535       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
536       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
537       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
538       vj++; tp = t + (*vj)*6;
539       v += 36;
540     }
541     tp    = t + k*6;
542     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
543     idx   = 6*r[k];
544     x[idx]     = x0;
545     x[idx+1]   = x1;
546     x[idx+2]   = x2;
547     x[idx+3]   = x3;
548     x[idx+4]   = x4;
549     x[idx+5]   = x5;
550   }
551 
552   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
553   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
555   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
561 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
562 {
563   MatScalar      *v,*d;
564   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
565   PetscInt       nz,*vj,k;
566 
567   PetscFunctionBegin;
568   for (k=0; k<mbs; k++){
569     v  = aa + 36*ai[k];
570     xp = x + k*6;
571     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
572     nz = ai[k+1] - ai[k];
573     vj = aj + ai[k];
574     xp = x + (*vj)*6;
575     while (nz--) {
576       /* x(:) += U(k,:)^T*(Dk*xk) */
577       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
578       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
579       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
580       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
581       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
582       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
583       vj++; xp = x + (*vj)*6;
584       v += 36;
585     }
586     /* xk = inv(Dk)*(Dk*xk) */
587     d  = aa+k*36;          /* ptr to inv(Dk) */
588     xp = x + k*6;
589     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
590     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
591     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
592     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
593     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
594     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
595   }
596   PetscFunctionReturn(0);
597 }
598 #undef __FUNCT__
599 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
600 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
601 {
602   MatScalar      *v,*d;
603   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
604   PetscInt       nz,*vj,k;
605 
606   PetscFunctionBegin;
607   for (k=mbs-1; k>=0; k--){
608     v  = aa + 36*ai[k];
609     xp = x + k*6;
610     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
611     nz = ai[k+1] - ai[k];
612     vj = aj + ai[k];
613     xp = x + (*vj)*6;
614     while (nz--) {
615       /* xk += U(k,:)*x(:) */
616       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
617       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
618       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
619       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
620       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
621       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
622       vj++;
623       v += 36; xp = x + (*vj)*6;
624     }
625     xp = x + k*6;
626     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
634 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
635 {
636   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
637   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
638   MatScalar      *aa=a->a;
639   PetscScalar    *x,*b;
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
644   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
645 
646   /* solve U^T * D * y = b by forward substitution */
647   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
648   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
649 
650   /* solve U*x = y by back substitution */
651   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
652 
653   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
654   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
655   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering"
661 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
662 {
663   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
664   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
665   MatScalar      *aa=a->a;
666   PetscScalar    *x,*b;
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
671   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
672   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
673   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
674   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
675   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
676   ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering"
682 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
683 {
684   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
685   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
686   MatScalar      *aa=a->a;
687   PetscScalar    *x,*b;
688   PetscErrorCode ierr;
689 
690   PetscFunctionBegin;
691   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
692   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
693   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
694   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
695   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
696   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
697   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
703 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
704 {
705   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
706   IS             isrow=a->row;
707   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
708   PetscErrorCode ierr;
709   PetscInt       nz,*vj,k,*r,idx;
710   MatScalar      *aa=a->a,*v,*diag;
711   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
712 
713   PetscFunctionBegin;
714   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
715   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
716   t  = a->solve_work;
717   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
718 
719   /* solve U^T * D * y = b by forward substitution */
720   tp = t;
721   for (k=0; k<mbs; k++) { /* t <- perm(b) */
722     idx   = 5*r[k];
723     tp[0] = b[idx];
724     tp[1] = b[idx+1];
725     tp[2] = b[idx+2];
726     tp[3] = b[idx+3];
727     tp[4] = b[idx+4];
728     tp += 5;
729   }
730 
731   for (k=0; k<mbs; k++){
732     v  = aa + 25*ai[k];
733     vj = aj + ai[k];
734     tp = t + k*5;
735     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
736     nz = ai[k+1] - ai[k];
737 
738     tp = t + (*vj)*5;
739     while (nz--) {
740       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
741       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
742       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
743       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
744       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
745       vj++; tp = t + (*vj)*5;
746       v += 25;
747     }
748 
749     /* xk = inv(Dk)*(Dk*xk) */
750     diag  = aa+k*25;          /* ptr to inv(Dk) */
751     tp    = t + k*5;
752       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
753       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
754       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
755       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
756       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
757   }
758 
759   /* solve U*x = y by back substitution */
760   for (k=mbs-1; k>=0; k--){
761     v  = aa + 25*ai[k];
762     vj = aj + ai[k];
763     tp    = t + k*5;
764     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
765     nz = ai[k+1] - ai[k];
766 
767     tp = t + (*vj)*5;
768     while (nz--) {
769       /* xk += U(k,:)*x(:) */
770       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
771       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
772       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
773       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
774       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
775       vj++; tp = t + (*vj)*5;
776       v += 25;
777     }
778     tp    = t + k*5;
779     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
780     idx   = 5*r[k];
781     x[idx]     = x0;
782     x[idx+1]   = x1;
783     x[idx+2]   = x2;
784     x[idx+3]   = x3;
785     x[idx+4]   = x4;
786   }
787 
788   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
789   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
790   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
791   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
797 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
798 {
799   MatScalar      *v,*diag;
800   PetscScalar    *xp,x0,x1,x2,x3,x4;
801   PetscInt       nz,*vj,k;
802 
803   PetscFunctionBegin;
804   for (k=0; k<mbs; k++){
805     v  = aa + 25*ai[k];
806     xp = x + k*5;
807     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
808     nz = ai[k+1] - ai[k];
809     vj = aj + ai[k];
810     xp = x + (*vj)*5;
811     while (nz--) {
812       /* x(:) += U(k,:)^T*(Dk*xk) */
813       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
814       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
815       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
816       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
817       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
818       vj++; xp = x + (*vj)*5;
819       v += 25;
820     }
821     /* xk = inv(Dk)*(Dk*xk) */
822     diag = aa+k*25;          /* ptr to inv(Dk) */
823     xp   = x + k*5;
824     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
825     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
826     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
827     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
828     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
835 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
836 {
837   MatScalar      *v,*diag;
838   PetscScalar    *xp,x0,x1,x2,x3,x4;
839   PetscInt       nz,*vj,k;
840 
841   PetscFunctionBegin;
842   for (k=mbs-1; k>=0; k--){
843     v  = aa + 25*ai[k];
844     xp = x + k*5;
845     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
846     nz = ai[k+1] - ai[k];
847     vj = aj + ai[k];
848     xp = x + (*vj)*5;
849     while (nz--) {
850       /* xk += U(k,:)*x(:) */
851       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
852       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
853       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
854       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
855       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
856       vj++;
857       v += 25; xp = x + (*vj)*5;
858     }
859     xp = x + k*5;
860     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
867 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
868 {
869   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
870   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
871   MatScalar      *aa=a->a;
872   PetscScalar    *x,*b;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
877   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878 
879   /* solve U^T * D * y = b by forward substitution */
880   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
881   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
882 
883   /* solve U*x = y by back substitution */
884   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
885 
886   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering"
894 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
895 {
896   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
897   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
898   MatScalar      *aa=a->a;
899   PetscScalar    *x,*b;
900   PetscErrorCode ierr;
901 
902   PetscFunctionBegin;
903   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
904   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
905   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
906   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
907   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
908   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
909   ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering"
915 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
916 {
917   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
918   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
919   MatScalar      *aa=a->a;
920   PetscScalar    *x,*b;
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
925   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
926   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
927   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
928   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
929   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930   ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr);
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
936 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
937 {
938   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
939   IS             isrow=a->row;
940   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
941   PetscErrorCode ierr;
942   PetscInt       nz,*vj,k,*r,idx;
943   MatScalar      *aa=a->a,*v,*diag;
944   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
945 
946   PetscFunctionBegin;
947   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   t  = a->solve_work;
950   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
951 
952   /* solve U^T * D * y = b by forward substitution */
953   tp = t;
954   for (k=0; k<mbs; k++) { /* t <- perm(b) */
955     idx   = 4*r[k];
956     tp[0] = b[idx];
957     tp[1] = b[idx+1];
958     tp[2] = b[idx+2];
959     tp[3] = b[idx+3];
960     tp += 4;
961   }
962 
963   for (k=0; k<mbs; k++){
964     v  = aa + 16*ai[k];
965     vj = aj + ai[k];
966     tp = t + k*4;
967     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
968     nz = ai[k+1] - ai[k];
969 
970     tp = t + (*vj)*4;
971     while (nz--) {
972       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
973       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
974       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
975       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
976       vj++; tp = t + (*vj)*4;
977       v += 16;
978     }
979 
980     /* xk = inv(Dk)*(Dk*xk) */
981     diag  = aa+k*16;          /* ptr to inv(Dk) */
982     tp    = t + k*4;
983     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
984     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
985     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
986     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
987   }
988 
989   /* solve U*x = y by back substitution */
990   for (k=mbs-1; k>=0; k--){
991     v  = aa + 16*ai[k];
992     vj = aj + ai[k];
993     tp    = t + k*4;
994     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
995     nz = ai[k+1] - ai[k];
996 
997     tp = t + (*vj)*4;
998     while (nz--) {
999       /* xk += U(k,:)*x(:) */
1000       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1001       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1002       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1003       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1004       vj++; tp = t + (*vj)*4;
1005       v += 16;
1006     }
1007     tp    = t + k*4;
1008     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1009     idx        = 4*r[k];
1010     x[idx]     = x0;
1011     x[idx+1]   = x1;
1012     x[idx+2]   = x2;
1013     x[idx+3]   = x3;
1014   }
1015 
1016   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1017   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1018   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1019   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1025 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1026 {
1027   MatScalar      *v,*diag;
1028   PetscScalar    *xp,*b,x0,x1,x2,x3;
1029   PetscInt       nz,*vj,k;
1030 
1031   PetscFunctionBegin;
1032   for (k=0; k<mbs; k++){
1033     v  = aa + 16*ai[k];
1034     xp = x + k*4;
1035     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1036     nz = ai[k+1] - ai[k];
1037     vj = aj + ai[k];
1038     xp = x + (*vj)*4;
1039     while (nz--) {
1040       /* x(:) += U(k,:)^T*(Dk*xk) */
1041       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1042       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1043       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1044       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1045       vj++; xp = x + (*vj)*4;
1046       v += 16;
1047     }
1048     /* xk = inv(Dk)*(Dk*xk) */
1049     diag = aa+k*16;          /* ptr to inv(Dk) */
1050     xp   = x + k*4;
1051     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1052     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1053     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1054     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1061 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1062 {
1063   MatScalar      *v,*diag;
1064   PetscScalar    *xp,*b,x0,x1,x2,x3;
1065   PetscInt       nz,*vj,k;
1066 
1067   PetscFunctionBegin;
1068   for (k=mbs-1; k>=0; k--){
1069     v  = aa + 16*ai[k];
1070     xp = x + k*4;
1071     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1072     nz = ai[k+1] - ai[k];
1073     vj = aj + ai[k];
1074     xp = x + (*vj)*4;
1075     while (nz--) {
1076       /* xk += U(k,:)*x(:) */
1077       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1078       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1079       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1080       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1081       vj++;
1082       v += 16; xp = x + (*vj)*4;
1083     }
1084     xp = x + k*4;
1085     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
1092 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1093 {
1094   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1095   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1096   MatScalar      *aa=a->a;
1097   PetscScalar    *x,*b;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1102   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1103 
1104   /* solve U^T * D * y = b by forward substitution */
1105   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1106   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1107 
1108   /* solve U*x = y by back substitution */
1109   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1110   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1111   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1112   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering"
1118 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1119 {
1120   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1121   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1122   MatScalar      *aa=a->a;
1123   PetscScalar    *x,*b;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1128   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1129   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1130   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1132   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1133   ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering"
1139 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1140 {
1141   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1142   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1143   MatScalar      *aa=a->a;
1144   PetscScalar    *x,*b;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1149   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1150   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1151   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1152   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1153   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1154   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
1160 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1161 {
1162   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1163   IS             isrow=a->row;
1164   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1165   PetscErrorCode ierr;
1166   PetscInt       nz,*vj,k,*r,idx;
1167   MatScalar      *aa=a->a,*v,*diag;
1168   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
1169 
1170   PetscFunctionBegin;
1171   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1172   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1173   t  = a->solve_work;
1174   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1175 
1176   /* solve U^T * D * y = b by forward substitution */
1177   tp = t;
1178   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1179     idx   = 3*r[k];
1180     tp[0] = b[idx];
1181     tp[1] = b[idx+1];
1182     tp[2] = b[idx+2];
1183     tp += 3;
1184   }
1185 
1186   for (k=0; k<mbs; k++){
1187     v  = aa + 9*ai[k];
1188     vj = aj + ai[k];
1189     tp = t + k*3;
1190     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1191     nz = ai[k+1] - ai[k];
1192 
1193     tp = t + (*vj)*3;
1194     while (nz--) {
1195       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1196       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1197       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1198       vj++; tp = t + (*vj)*3;
1199       v += 9;
1200     }
1201 
1202     /* xk = inv(Dk)*(Dk*xk) */
1203     diag  = aa+k*9;          /* ptr to inv(Dk) */
1204     tp    = t + k*3;
1205     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1206     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1207     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1208   }
1209 
1210   /* solve U*x = y by back substitution */
1211   for (k=mbs-1; k>=0; k--){
1212     v  = aa + 9*ai[k];
1213     vj = aj + ai[k];
1214     tp    = t + k*3;
1215     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1216     nz = ai[k+1] - ai[k];
1217 
1218     tp = t + (*vj)*3;
1219     while (nz--) {
1220       /* xk += U(k,:)*x(:) */
1221       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1222       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1223       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1224       vj++; tp = t + (*vj)*3;
1225       v += 9;
1226     }
1227     tp    = t + k*3;
1228     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1229     idx      = 3*r[k];
1230     x[idx]   = x0;
1231     x[idx+1] = x1;
1232     x[idx+2] = x2;
1233   }
1234 
1235   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1236   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1237   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1238   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1244 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1245 {
1246   MatScalar      *v,*diag;
1247   PetscScalar    *xp,*b,x0,x1,x2;
1248   PetscInt       nz,*vj,k;
1249 
1250   PetscFunctionBegin;
1251   for (k=0; k<mbs; k++){
1252     v  = aa + 9*ai[k];
1253     xp = x + k*3;
1254     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1255     nz = ai[k+1] - ai[k];
1256     vj = aj + ai[k];
1257     xp = x + (*vj)*3;
1258     while (nz--) {
1259       /* x(:) += U(k,:)^T*(Dk*xk) */
1260       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1261       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1262       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1263       vj++; xp = x + (*vj)*3;
1264       v += 9;
1265     }
1266     /* xk = inv(Dk)*(Dk*xk) */
1267     diag = aa+k*9;          /* ptr to inv(Dk) */
1268     xp   = x + k*3;
1269     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1270     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1271     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1278 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1279 {
1280   MatScalar      *v,*diag;
1281   PetscScalar    *xp,*b,x0,x1,x2;
1282   PetscInt       nz,*vj,k;
1283 
1284   PetscFunctionBegin;
1285   for (k=mbs-1; k>=0; k--){
1286     v  = aa + 9*ai[k];
1287     xp = x + k*3;
1288     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1289     nz = ai[k+1] - ai[k];
1290     vj = aj + ai[k];
1291     xp = x + (*vj)*3;
1292     while (nz--) {
1293       /* xk += U(k,:)*x(:) */
1294       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1295       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1296       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1297       vj++;
1298       v += 9; xp = x + (*vj)*3;
1299     }
1300     xp = x + k*3;
1301     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1308 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1309 {
1310   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1311   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1312   MatScalar      *aa=a->a;
1313   PetscScalar    *x,*b;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1318   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1319 
1320   /* solve U^T * D * y = b by forward substitution */
1321   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1322   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1323 
1324   /* solve U*x = y by back substitution */
1325   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1326 
1327   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1328   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1329   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering"
1335 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1336 {
1337   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1338   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1339   MatScalar      *aa=a->a;
1340   PetscScalar    *x,*b;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1345   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1346   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1347   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1348   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1349   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1350   ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering"
1356 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1357 {
1358   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1359   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1360   MatScalar      *aa=a->a;
1361   PetscScalar    *x,*b;
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1366   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1367   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1368   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1369   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1370   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1371   ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1377 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1378 {
1379   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1380   IS             isrow=a->row;
1381   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1382   PetscErrorCode ierr;
1383   PetscInt       nz,*vj,k,k2,*r,idx;
1384   MatScalar      *aa=a->a,*v,*diag;
1385   PetscScalar    *x,*b,x0,x1,*t;
1386 
1387   PetscFunctionBegin;
1388   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1389   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1390   t  = a->solve_work;
1391   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1392 
1393   /* solve U^T * D * y = perm(b) by forward substitution */
1394   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1395     idx = 2*r[k];
1396     t[k*2]   = b[idx];
1397     t[k*2+1] = b[idx+1];
1398   }
1399   for (k=0; k<mbs; k++){
1400     v  = aa + 4*ai[k];
1401     vj = aj + ai[k];
1402     k2 = k*2;
1403     x0 = t[k2]; x1 = t[k2+1];
1404     nz = ai[k+1] - ai[k];
1405     while (nz--) {
1406       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1407       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1408       vj++; v += 4;
1409     }
1410     diag = aa+k*4;  /* ptr to inv(Dk) */
1411     t[k2]   = diag[0]*x0 + diag[2]*x1;
1412     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1413   }
1414 
1415   /* solve U*x = y by back substitution */
1416   for (k=mbs-1; k>=0; k--){
1417     v  = aa + 4*ai[k];
1418     vj = aj + ai[k];
1419     k2 = k*2;
1420     x0 = t[k2]; x1 = t[k2+1];
1421     nz = ai[k+1] - ai[k];
1422     while (nz--) {
1423       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1424       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1425       vj++; v += 4;
1426     }
1427     t[k2]    = x0;
1428     t[k2+1]  = x1;
1429     idx      = 2*r[k];
1430     x[idx]   = x0;
1431     x[idx+1] = x1;
1432   }
1433 
1434   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1435   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1437   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1443 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1444 {
1445   MatScalar      *v,*diag;
1446   PetscScalar    x0,x1;
1447   PetscInt       nz,*vj,k,k2;
1448 
1449   PetscFunctionBegin;
1450   for (k=0; k<mbs; k++){
1451     v  = aa + 4*ai[k];
1452     vj = aj + ai[k];
1453     k2 = k*2;
1454     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1455     nz = ai[k+1] - ai[k];
1456 
1457     while (nz--) {
1458       /* x(:) += U(k,:)^T*(Dk*xk) */
1459       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1460       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1461       vj++; v += 4;
1462     }
1463     /* xk = inv(Dk)*(Dk*xk) */
1464     diag = aa+k*4;          /* ptr to inv(Dk) */
1465     x[k2]   = diag[0]*x0 + diag[2]*x1;
1466     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1473 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1474 {
1475   MatScalar      *v,*diag;
1476   PetscScalar    x0,x1;
1477   PetscInt       nz,*vj,k,k2;
1478 
1479   PetscFunctionBegin;
1480   for (k=mbs-1; k>=0; k--){
1481     v  = aa + 4*ai[k];
1482     vj = aj + ai[k];
1483     k2 = k*2;
1484     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1485     nz = ai[k+1] - ai[k];
1486     while (nz--) {
1487       /* xk += U(k,:)*x(:) */
1488       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1489       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1490       vj++; v += 4;
1491     }
1492     x[k2]     = x0;
1493     x[k2+1]   = x1;
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1500 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1501 {
1502   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1503   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1504   MatScalar      *aa=a->a;
1505   PetscScalar    *x,*b;
1506   PetscErrorCode ierr;
1507 
1508   PetscFunctionBegin;
1509   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1510   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1511 
1512   /* solve U^T * D * y = b by forward substitution */
1513   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1514   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1515 
1516   /* solve U*x = y by back substitution */
1517   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1518 
1519   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1520   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1521   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering"
1527 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1528 {
1529   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1530   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1531   MatScalar      *aa=a->a;
1532   PetscScalar    *x,*b;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1537   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1538   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1539   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1540   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1541   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1542   ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering"
1548 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1549 {
1550   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1551   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1552   MatScalar      *aa=a->a;
1553   PetscScalar    *x,*b;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1558   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1559   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1560   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1561   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1563   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1569 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1570 {
1571   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1572   IS             isrow=a->row;
1573   PetscErrorCode ierr;
1574   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1575   MatScalar      *aa=a->a,*v;
1576   PetscScalar    *x,*b,xk,*t;
1577   PetscInt       nz,*vj,k;
1578 
1579   PetscFunctionBegin;
1580   if (!mbs) PetscFunctionReturn(0);
1581   /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */
1582 
1583   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1585   t    = a->solve_work;
1586 
1587   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1588 
1589   /* solve U^T*D*y = perm(b) by forward substitution */
1590   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1591   for (k=0; k<mbs; k++){
1592     v  = aa + ai[k] + 1;
1593     vj = aj + ai[k] + 1;
1594     xk = t[k];
1595     nz = ai[k+1] - ai[k] - 1;
1596     while (nz--) t[*vj++] += (*v++) * xk;
1597     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1598   }
1599 
1600   /* solve U*x = y by back substitution */
1601   for (k=mbs-1; k>=0; k--){
1602     v  = aa + ai[k] + 1;
1603     vj = aj + ai[k] + 1;
1604     xk = t[k];
1605     nz = ai[k+1] - ai[k] - 1;
1606     while (nz--) xk += (*v++) * t[*vj++];
1607     t[k]      = xk;
1608     x[rip[k]] = xk;
1609   }
1610 
1611   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1612   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1613   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1614   ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1620 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1621 {
1622   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   if (A->rmap.bs == 1) {
1627     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1628   } else {
1629     IS              isrow=a->row;
1630     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1631     MatScalar       *aa=a->a,*v;
1632     PetscScalar     *x,*b,*t;
1633     PetscInt             nz,*vj,k,n;
1634     if (bb->n > a->solves_work_n) {
1635       ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1636       ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1637       a->solves_work_n = bb->n;
1638     }
1639     n    = bb->n;
1640     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1641     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1642     t    = a->solves_work;
1643 
1644     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1645 
1646     /* solve U^T*D*y = perm(b) by forward substitution */
1647     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1648     for (k=0; k<mbs; k++){
1649       v  = aa + ai[k];
1650       vj = aj + ai[k];
1651       nz = ai[k+1] - ai[k];
1652       while (nz--) {
1653         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1654         v++;vj++;
1655       }
1656       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1657     }
1658 
1659     /* solve U*x = y by back substitution */
1660     for (k=mbs-1; k>=0; k--){
1661       v  = aa + ai[k];
1662       vj = aj + ai[k];
1663       nz = ai[k+1] - ai[k];
1664       while (nz--) {
1665         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1666         v++;vj++;
1667       }
1668       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1669     }
1670 
1671     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1672     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1673     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1674     ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 /*
1680       Special case where the matrix was ILU(0) factored in the natural
1681    ordering. This eliminates the need for the column and row permutation.
1682 */
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1685 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1686 {
1687   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1688   PetscErrorCode ierr;
1689   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1690   MatScalar      *aa=a->a,*v;
1691   PetscScalar    *x,*b,xk;
1692   PetscInt       nz,*vj,k;
1693 
1694   PetscFunctionBegin;
1695   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1696   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1697 
1698   /* solve U^T*D*y = b by forward substitution */
1699   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1700   for (k=0; k<mbs; k++){
1701     v  = aa + ai[k] + 1;
1702     vj = aj + ai[k] + 1;
1703     xk = x[k];
1704     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1705     while (nz--) x[*vj++] += (*v++) * xk;
1706     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1707   }
1708 
1709   /* solve U*x = y by back substitution */
1710   for (k=mbs-2; k>=0; k--){
1711     v  = aa + ai[k] + 1;
1712     vj = aj + ai[k] + 1;
1713     xk = x[k];
1714     nz = ai[k+1] - ai[k] - 1;
1715     while (nz--) xk += (*v++) * x[*vj++];
1716     x[k] = xk;
1717   }
1718 
1719   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1720   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1721   ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering"
1727 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1728 {
1729   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1730   PetscErrorCode ierr;
1731   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1732   MatScalar      *aa=a->a,*v;
1733   PetscScalar    *x,*b,xk;
1734   PetscInt       nz,*vj,k;
1735 
1736   PetscFunctionBegin;
1737   /* solve U^T*D*x = b by forward substitution */
1738   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1739   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1740   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1741   for (k=0; k<mbs; k++){
1742     v  = aa + ai[k] + 1;
1743     vj = aj + ai[k] + 1;
1744     xk = x[k];
1745     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1746     while (nz--) x[*vj++] += (*v++) * xk;
1747     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1748   }
1749   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1750   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1751   ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering"
1757 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1758 {
1759   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1760   PetscErrorCode ierr;
1761   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1762   MatScalar      *aa=a->a,*v;
1763   PetscScalar    *x,*b,xk;
1764   PetscInt       nz,*vj,k;
1765 
1766   PetscFunctionBegin;
1767   /* solve U*x = b by back substitution */
1768   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1769   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1770   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1771   for (k=mbs-2; k>=0; k--){
1772     v  = aa + ai[k] + 1;
1773     vj = aj + ai[k] + 1;
1774     xk = x[k];
1775     nz = ai[k+1] - ai[k] - 1;
1776     while (nz--) xk += (*v++) * x[*vj++];
1777     x[k] = xk;
1778   }
1779   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1780   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1781   ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1788 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1789 {
1790   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1791   PetscErrorCode ierr;
1792   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1793   PetscInt       *jutmp,bs = A->rmap.bs,bs2=a->bs2;
1794   PetscInt       m,reallocs = 0,*levtmp;
1795   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1796   PetscInt       incrlev,*lev,shift,prow,nz;
1797   PetscReal      f = info->fill,levels = info->levels;
1798   PetscTruth     perm_identity;
1799 
1800   PetscFunctionBegin;
1801   /* check whether perm is the identity mapping */
1802   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1803 
1804   if (perm_identity){
1805     a->permute = PETSC_FALSE;
1806     ai = a->i; aj = a->j;
1807   } else { /*  non-trivial permutation */
1808     a->permute = PETSC_TRUE;
1809     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1810     ai = a->inew; aj = a->jnew;
1811   }
1812 
1813   /* initialization */
1814   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1815   umax  = (PetscInt)(f*ai[mbs] + 1);
1816   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1817   umax += mbs + 1;
1818   shift = mbs + 1;
1819   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1820   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1821   iu[0] = mbs + 1;
1822   juidx = mbs + 1;
1823   /* prowl: linked list for pivot row */
1824   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1825   /* q: linked list for col index */
1826   q       = prowl + mbs;
1827   levtmp  = q     + mbs;
1828 
1829   for (i=0; i<mbs; i++){
1830     prowl[i] = mbs;
1831     q[i] = 0;
1832   }
1833 
1834   /* for each row k */
1835   for (k=0; k<mbs; k++){
1836     nzk  = 0;
1837     q[k] = mbs;
1838     /* copy current row into linked list */
1839     nz = ai[rip[k]+1] - ai[rip[k]];
1840     j = ai[rip[k]];
1841     while (nz--){
1842       vj = rip[aj[j++]];
1843       if (vj > k){
1844         qm = k;
1845         do {
1846           m  = qm; qm = q[m];
1847         } while(qm < vj);
1848         if (qm == vj) {
1849           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1850         }
1851         nzk++;
1852         q[m]       = vj;
1853         q[vj]      = qm;
1854         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1855       }
1856     }
1857 
1858     /* modify nonzero structure of k-th row by computing fill-in
1859        for each row prow to be merged in */
1860     prow = k;
1861     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1862 
1863     while (prow < k){
1864       /* merge row prow into k-th row */
1865       jmin = iu[prow] + 1;
1866       jmax = iu[prow+1];
1867       qm = k;
1868       for (j=jmin; j<jmax; j++){
1869         incrlev = lev[j-shift] + 1;
1870 	if (incrlev > levels) continue;
1871         vj      = ju[j];
1872         do {
1873           m = qm; qm = q[m];
1874         } while (qm < vj);
1875         if (qm != vj){      /* a fill */
1876           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1877           levtmp[vj] = incrlev;
1878         } else {
1879           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1880         }
1881       }
1882       prow = prowl[prow]; /* next pivot row */
1883     }
1884 
1885     /* add k to row list for first nonzero element in k-th row */
1886     if (nzk > 1){
1887       i = q[k]; /* col value of first nonzero element in k_th row of U */
1888       prowl[k] = prowl[i]; prowl[i] = k;
1889     }
1890     iu[k+1] = iu[k] + nzk;
1891 
1892     /* allocate more space to ju and lev if needed */
1893     if (iu[k+1] > umax) {
1894       /* estimate how much additional space we will need */
1895       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1896       /* just double the memory each time */
1897       maxadd = umax;
1898       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1899       umax += maxadd;
1900 
1901       /* allocate a longer ju */
1902       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1903       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1904       ierr     = PetscFree(ju);CHKERRQ(ierr);
1905       ju       = jutmp;
1906 
1907       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1908       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1909       ierr     = PetscFree(lev);CHKERRQ(ierr);
1910       lev      = jutmp;
1911       reallocs += 2; /* count how many times we realloc */
1912     }
1913 
1914     /* save nonzero structure of k-th row in ju */
1915     i=k;
1916     while (nzk --) {
1917       i                = q[i];
1918       ju[juidx]        = i;
1919       lev[juidx-shift] = levtmp[i];
1920       juidx++;
1921     }
1922   }
1923 
1924 #if defined(PETSC_USE_INFO)
1925   if (ai[mbs] != 0) {
1926     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1927     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1928     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1929     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
1930     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1931   } else {
1932     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1933   }
1934 #endif
1935 
1936   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1937   ierr = PetscFree(prowl);CHKERRQ(ierr);
1938   ierr = PetscFree(lev);CHKERRQ(ierr);
1939 
1940   /* put together the new matrix */
1941   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
1942   ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr);
1943   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1944   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1945 
1946   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
1947   b    = (Mat_SeqSBAIJ*)(*B)->data;
1948   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
1949   b->singlemalloc = PETSC_FALSE;
1950   b->free_a       = PETSC_TRUE;
1951   b->free_ij      = PETSC_TRUE;
1952   /* the next line frees the default space generated by the Create() */
1953   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1954   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1955   b->j    = ju;
1956   b->i    = iu;
1957   b->diag = 0;
1958   b->ilen = 0;
1959   b->imax = 0;
1960 
1961   if (b->row) {
1962     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1963   }
1964   if (b->icol) {
1965     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1966   }
1967   b->row  = perm;
1968   b->icol = perm;
1969   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1970   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1971   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1972   /* In b structure:  Free imax, ilen, old a, old j.
1973      Allocate idnew, solve_work, new a, new j */
1974   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1975   b->maxnz = b->nz = iu[mbs];
1976 
1977   (*B)->factor                 = FACTOR_CHOLESKY;
1978   (*B)->info.factor_mallocs    = reallocs;
1979   (*B)->info.fill_ratio_given  = f;
1980   if (ai[mbs] != 0) {
1981     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1982   } else {
1983     (*B)->info.fill_ratio_needed = 0.0;
1984   }
1985 
1986   if (perm_identity){
1987     switch (bs) {
1988       case 1:
1989         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1990         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1991         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1992         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1993         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
1994         break;
1995       case 2:
1996         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1997         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1998         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1999         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
2000         break;
2001       case 3:
2002         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2003         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2004         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2005         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2006         break;
2007       case 4:
2008         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2009         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2010         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2011         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2012         break;
2013       case 5:
2014         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2015         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2016         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2017         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2018         break;
2019       case 6:
2020         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2021         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2022         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2023         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2024         break;
2025       case 7:
2026         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2027         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2028         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2029         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2030       break;
2031       default:
2032         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2033         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2034         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2035         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2036       break;
2037     }
2038   }
2039 
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #include "petscbt.h"
2044 #include "src/mat/utils/freespace.h"
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
2047 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2048 {
2049   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2050   Mat_SeqSBAIJ       *b;
2051   Mat                B;
2052   PetscErrorCode     ierr;
2053   PetscTruth         perm_identity,free_ij = PETSC_TRUE;
2054   PetscInt           bs=A->rmap.bs,am=a->mbs;
2055   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
2056   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2057   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2058   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2059   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2060   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2061   PetscBT            lnkbt;
2062   PetscScalar        *ua;
2063 
2064   PetscFunctionBegin;
2065   /*
2066    This code originally uses Modified Sparse Row (MSR) storage
2067    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2068    Then it is rewritten so the factor B takes seqsbaij format. However the associated
2069    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2070    thus the original code in MSR format is still used for these cases.
2071    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2072    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2073   */
2074   if (bs > 1){
2075     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
2076     PetscFunctionReturn(0);
2077   }
2078 
2079   /* check whether perm is the identity mapping */
2080   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2081 
2082   /* special case that simply copies fill pattern */
2083   if (!levels && perm_identity) {
2084     a->permute = PETSC_FALSE;
2085     /* reuse the column pointers and row offsets for memory savings */
2086     ui           = a->i;
2087     uj           = a->j;
2088     free_ij      = PETSC_FALSE;
2089     ratio_needed = 1.0;
2090   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2091     if (perm_identity){
2092       a->permute = PETSC_FALSE;
2093       ai = a->i; aj = a->j;
2094     } else { /*  non-trivial permutation */
2095       a->permute = PETSC_TRUE;
2096       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2097       ai   = a->inew; aj = a->jnew;
2098     }
2099     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2100 
2101     /* initialization */
2102     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2103     ui[0] = 0;
2104 
2105     /* jl: linked list for storing indices of the pivot rows
2106        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2107     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
2108     il         = jl + am;
2109     uj_ptr     = (PetscInt**)(il + am);
2110     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2111     for (i=0; i<am; i++){
2112       jl[i] = am; il[i] = 0;
2113     }
2114 
2115     /* create and initialize a linked list for storing column indices of the active row k */
2116     nlnk = am + 1;
2117     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2118 
2119     /* initial FreeSpace size is fill*(ai[am]+1) */
2120     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2121     current_space = free_space;
2122     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2123     current_space_lvl = free_space_lvl;
2124 
2125     for (k=0; k<am; k++){  /* for each active row k */
2126       /* initialize lnk by the column indices of row rip[k] */
2127       nzk   = 0;
2128       ncols = ai[rip[k]+1] - ai[rip[k]];
2129       cols  = aj+ai[rip[k]];
2130       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2131       nzk += nlnk;
2132 
2133       /* update lnk by computing fill-in for each pivot row to be merged in */
2134       prow = jl[k]; /* 1st pivot row */
2135 
2136       while (prow < k){
2137         nextprow = jl[prow];
2138 
2139         /* merge prow into k-th row */
2140         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2141         jmax = ui[prow+1];
2142         ncols = jmax-jmin;
2143         i     = jmin - ui[prow];
2144         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2145         j     = *(uj_lvl_ptr[prow] + i - 1);
2146         cols_lvl = uj_lvl_ptr[prow]+i;
2147         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2148         nzk += nlnk;
2149 
2150         /* update il and jl for prow */
2151         if (jmin < jmax){
2152           il[prow] = jmin;
2153           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2154         }
2155         prow = nextprow;
2156       }
2157 
2158       /* if free space is not available, make more free space */
2159       if (current_space->local_remaining<nzk) {
2160         i = am - k + 1; /* num of unfactored rows */
2161         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2162         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2163         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2164         reallocs++;
2165       }
2166 
2167       /* copy data into free_space and free_space_lvl, then initialize lnk */
2168       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2169 
2170       /* add the k-th row into il and jl */
2171       if (nzk-1 > 0){
2172         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2173         jl[k] = jl[i]; jl[i] = k;
2174         il[k] = ui[k] + 1;
2175       }
2176       uj_ptr[k]     = current_space->array;
2177       uj_lvl_ptr[k] = current_space_lvl->array;
2178 
2179       current_space->array           += nzk;
2180       current_space->local_used      += nzk;
2181       current_space->local_remaining -= nzk;
2182       current_space_lvl->array           += nzk;
2183       current_space_lvl->local_used      += nzk;
2184       current_space_lvl->local_remaining -= nzk;
2185 
2186       ui[k+1] = ui[k] + nzk;
2187     }
2188 
2189 #if defined(PETSC_USE_INFO)
2190     if (ai[am] != 0) {
2191       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2192       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2193       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2194       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2195     } else {
2196       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2197     }
2198 #endif
2199 
2200     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2201     ierr = PetscFree(jl);CHKERRQ(ierr);
2202 
2203     /* destroy list of free space and other temporary array(s) */
2204     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2205     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2206     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2207     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2208     if (ai[am] != 0) {
2209       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2210     } else {
2211       ratio_needed = 0.0;
2212     }
2213   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2214 
2215   /* put together the new matrix in MATSEQSBAIJ format */
2216   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr);
2217   ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr);
2218   B = *fact;
2219   b = (Mat_SeqSBAIJ*)B->data;
2220   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2221   b->singlemalloc = PETSC_FALSE;
2222   b->free_a  = PETSC_TRUE;
2223   b->free_ij = free_ij;
2224   b->diag    = 0;
2225   b->ilen    = 0;
2226   b->imax    = 0;
2227   b->row     = perm;
2228   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2229   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2230   b->icol = perm;
2231   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2232   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2233   b->maxnz = b->nz = ui[am];
2234 
2235   B->factor                 = FACTOR_CHOLESKY;
2236   B->info.factor_mallocs    = reallocs;
2237   B->info.fill_ratio_given  = fill;
2238   B->info.fill_ratio_needed = ratio_needed;
2239 
2240   if (perm_identity){
2241     switch (bs) {
2242       case 1:
2243         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2244         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2245         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2246         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
2247         break;
2248       case 2:
2249         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2250         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2251         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2252         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
2253         break;
2254       case 3:
2255         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2256         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2257         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2258         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2259         break;
2260       case 4:
2261         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2262         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2263         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2264         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2265         break;
2266       case 5:
2267         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2268         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2269         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2270         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2271         break;
2272       case 6:
2273         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2274         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2275         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2276         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2277         break;
2278       case 7:
2279         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2280         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2281         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2282         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2283       break;
2284       default:
2285         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2286         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2287         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2288         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2289       break;
2290     }
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295