xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 6a4d4014f38af0fb260059d32162e9656590be6c)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for SBAIJ format.
5 */
6 
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/inline/ilu.h"
10 #include "src/inline/dot.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15 {
16   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
17   IS              isrow=a->row;
18   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
19   PetscErrorCode  ierr;
20   PetscInt        nz,*vj,k,*r,idx,k1;
21   PetscInt        bs=A->rmap.bs,bs2 = a->bs2;
22   MatScalar       *aa=a->a,*v,*diag;
23   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
24 
25   PetscFunctionBegin;
26   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
27   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
28   t  = a->solve_work;
29   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
30   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
31 
32   /* solve U^T * D * y = b by forward substitution */
33   xk = t;
34   for (k=0; k<mbs; k++) { /* t <- perm(b) */
35     idx   = bs*r[k];
36     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37   }
38   for (k=0; k<mbs; k++){
39     v  = aa + bs2*ai[k];
40     xk = t + k*bs;      /* Dk*xk = k-th block of x */
41     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
42     nz = ai[k+1] - ai[k];
43     vj = aj + ai[k];
44     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
45     while (nz--) {
46       /* x(:) += U(k,:)^T*(Dk*xk) */
47       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48       vj++; xj = t + (*vj)*bs;
49       v += bs2;
50     }
51     /* xk = inv(Dk)*(Dk*xk) */
52     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
53     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54   }
55 
56   /* solve U*x = y by back substitution */
57   for (k=mbs-1; k>=0; k--){
58     v  = aa + bs2*ai[k];
59     xk = t + k*bs;        /* xk */
60     nz = ai[k+1] - ai[k];
61     vj = aj + ai[k];
62     xj = t + (*vj)*bs;
63     while (nz--) {
64       /* xk += U(k,:)*x(:) */
65       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66       vj++;
67       v += bs2; xj = t + (*vj)*bs;
68     }
69     idx   = bs*r[k];
70     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71   }
72 
73   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
74   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
75   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
76   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
77   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
83 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
84 {
85   PetscErrorCode ierr;
86   PetscInt       nz,*vj,k;
87   PetscInt       bs2 = bs*bs;
88   MatScalar      *v,*diag;
89   PetscScalar    *xk,*xj,*xk_tmp;
90 
91   PetscFunctionBegin;
92   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
93   for (k=0; k<mbs; k++){
94     v  = aa + bs2*ai[k];
95     xk = x + k*bs;      /* Dk*xk = k-th block of x */
96     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
97     nz = ai[k+1] - ai[k];
98     vj = aj + ai[k];
99     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
100     while (nz--) {
101       /* x(:) += U(k,:)^T*(Dk*xk) */
102       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
103       vj++; xj = x + (*vj)*bs;
104       v += bs2;
105     }
106     /* xk = inv(Dk)*(Dk*xk) */
107     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
108     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
109   }
110   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private"
116 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
117 {
118   PetscInt       nz,*vj,k;
119   PetscInt       bs2 = bs*bs;
120   MatScalar      *v;
121   PetscScalar    *xk,*xj;
122 
123   PetscFunctionBegin;
124   for (k=mbs-1; k>=0; k--){
125     v  = aa + bs2*ai[k];
126     xk = x + k*bs;        /* xk */
127     nz = ai[k+1] - ai[k];
128     vj = aj + ai[k];
129     xj = x + (*vj)*bs;
130     while (nz--) {
131       /* xk += U(k,:)*x(:) */
132       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
133       vj++;
134       v += bs2; xj = x + (*vj)*bs;
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
142 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
143 {
144   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
145   PetscErrorCode ierr;
146   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
147   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
148   MatScalar      *aa=a->a;
149   PetscScalar    *x,*b;
150 
151   PetscFunctionBegin;
152   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
153   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
154 
155   /* solve U^T * D * y = b by forward substitution */
156   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
157   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
158 
159   /* solve U*x = y by back substitution */
160   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
161 
162   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
163   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
164   ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr);
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering"
170 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
171 {
172   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
173   PetscErrorCode ierr;
174   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
175   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
176   MatScalar      *aa=a->a;
177   PetscScalar    *x,*b;
178 
179   PetscFunctionBegin;
180   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
181   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
182   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
183   ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
184   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
185   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
186   ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering"
192 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
193 {
194   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
195   PetscErrorCode ierr;
196   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
197   PetscInt       bs=A->rmap.bs,bs2 = a->bs2;
198   MatScalar      *aa=a->a;
199   PetscScalar    *x,*b;
200 
201   PetscFunctionBegin;
202   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
203   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
204   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
205   ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr);
206   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
214 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
215 {
216   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
217   IS             isrow=a->row;
218   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
219   PetscErrorCode ierr;
220   PetscInt       nz,*vj,k,*r,idx;
221   MatScalar      *aa=a->a,*v,*d;
222   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
223 
224   PetscFunctionBegin;
225   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
226   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
227   t  = a->solve_work;
228   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
229 
230   /* solve U^T * D * y = b by forward substitution */
231   tp = t;
232   for (k=0; k<mbs; k++) { /* t <- perm(b) */
233     idx   = 7*r[k];
234     tp[0] = b[idx];
235     tp[1] = b[idx+1];
236     tp[2] = b[idx+2];
237     tp[3] = b[idx+3];
238     tp[4] = b[idx+4];
239     tp[5] = b[idx+5];
240     tp[6] = b[idx+6];
241     tp += 7;
242   }
243 
244   for (k=0; k<mbs; k++){
245     v  = aa + 49*ai[k];
246     vj = aj + ai[k];
247     tp = t + k*7;
248     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
249     nz = ai[k+1] - ai[k];
250     tp = t + (*vj)*7;
251     while (nz--) {
252       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
253       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
254       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
255       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
256       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
257       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
258       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
259       vj++; tp = t + (*vj)*7;
260       v += 49;
261     }
262 
263     /* xk = inv(Dk)*(Dk*xk) */
264     d  = aa+k*49;          /* ptr to inv(Dk) */
265     tp    = t + k*7;
266     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
267     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
268     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
269     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
270     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
271     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
272     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
273   }
274 
275   /* solve U*x = y by back substitution */
276   for (k=mbs-1; k>=0; k--){
277     v  = aa + 49*ai[k];
278     vj = aj + ai[k];
279     tp    = t + k*7;
280     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
281     nz = ai[k+1] - ai[k];
282 
283     tp = t + (*vj)*7;
284     while (nz--) {
285       /* xk += U(k,:)*x(:) */
286       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
287       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
288       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
289       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
290       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
291       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
292       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
293       vj++; tp = t + (*vj)*7;
294       v += 49;
295     }
296     tp    = t + k*7;
297     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
298     idx   = 7*r[k];
299     x[idx]     = x0;
300     x[idx+1]   = x1;
301     x[idx+2]   = x2;
302     x[idx+3]   = x3;
303     x[idx+4]   = x4;
304     x[idx+5]   = x5;
305     x[idx+6]   = x6;
306   }
307 
308   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
309   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
310   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
311   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
317 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
318 {
319   MatScalar      *v,*d;
320   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
321   PetscInt       nz,*vj,k;
322 
323   PetscFunctionBegin;
324   for (k=0; k<mbs; k++){
325     v  = aa + 49*ai[k];
326     xp = x + k*7;
327     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
328     nz = ai[k+1] - ai[k];
329     vj = aj + ai[k];
330     xp = x + (*vj)*7;
331     while (nz--) {
332       /* x(:) += U(k,:)^T*(Dk*xk) */
333       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
334       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
335       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
336       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
337       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
338       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
339       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
340       vj++; xp = x + (*vj)*7;
341       v += 49;
342     }
343     /* xk = inv(Dk)*(Dk*xk) */
344     d  = aa+k*49;          /* ptr to inv(Dk) */
345     xp = x + k*7;
346     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
347     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
348     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
349     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
350     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
351     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
352     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private"
359 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
360 {
361   MatScalar      *v;
362   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
363   PetscInt       nz,*vj,k;
364 
365   PetscFunctionBegin;
366   for (k=mbs-1; k>=0; k--){
367     v  = aa + 49*ai[k];
368     xp = x + k*7;
369     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
370     nz = ai[k+1] - ai[k];
371     vj = aj + ai[k];
372     xp = x + (*vj)*7;
373     while (nz--) {
374       /* xk += U(k,:)*x(:) */
375       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
376       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
377       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
378       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
379       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
380       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
381       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
382       vj++;
383       v += 49; xp = x + (*vj)*7;
384     }
385     xp = x + k*7;
386     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
393 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
394 {
395   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
396   PetscErrorCode ierr;
397   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
398   MatScalar      *aa=a->a;
399   PetscScalar    *x,*b;
400 
401   PetscFunctionBegin;
402   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
403   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
404 
405   /* solve U^T * D * y = b by forward substitution */
406   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
407   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
408 
409   /* solve U*x = y by back substitution */
410   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
411 
412   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
413   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
414   ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering"
420 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
421 {
422   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
423   PetscErrorCode ierr;
424   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
425   MatScalar      *aa=a->a;
426   PetscScalar    *x,*b;
427 
428   PetscFunctionBegin;
429   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
430   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
431   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
432   ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
433   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
434   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
435   ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering"
441 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
442 {
443   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
444   PetscErrorCode ierr;
445   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
446   MatScalar      *aa=a->a;
447   PetscScalar    *x,*b;
448 
449   PetscFunctionBegin;
450   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
451   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
452   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
453   ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
454   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
455   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
456   ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
462 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
463 {
464   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
465   IS             isrow=a->row;
466   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
467   PetscErrorCode ierr;
468   PetscInt       nz,*vj,k,*r,idx;
469   MatScalar      *aa=a->a,*v,*d;
470   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
471 
472   PetscFunctionBegin;
473   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
474   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
475   t  = a->solve_work;
476   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
477 
478   /* solve U^T * D * y = b by forward substitution */
479   tp = t;
480   for (k=0; k<mbs; k++) { /* t <- perm(b) */
481     idx   = 6*r[k];
482     tp[0] = b[idx];
483     tp[1] = b[idx+1];
484     tp[2] = b[idx+2];
485     tp[3] = b[idx+3];
486     tp[4] = b[idx+4];
487     tp[5] = b[idx+5];
488     tp += 6;
489   }
490 
491   for (k=0; k<mbs; k++){
492     v  = aa + 36*ai[k];
493     vj = aj + ai[k];
494     tp = t + k*6;
495     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
496     nz = ai[k+1] - ai[k];
497     tp = t + (*vj)*6;
498     while (nz--) {
499       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
500       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
501       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
502       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
503       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
504       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
505       vj++; tp = t + (*vj)*6;
506       v += 36;
507     }
508 
509     /* xk = inv(Dk)*(Dk*xk) */
510     d  = aa+k*36;          /* ptr to inv(Dk) */
511     tp    = t + k*6;
512     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
513     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
514     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
515     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
516     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
517     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
518   }
519 
520   /* solve U*x = y by back substitution */
521   for (k=mbs-1; k>=0; k--){
522     v  = aa + 36*ai[k];
523     vj = aj + ai[k];
524     tp    = t + k*6;
525     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
526     nz = ai[k+1] - ai[k];
527 
528     tp = t + (*vj)*6;
529     while (nz--) {
530       /* xk += U(k,:)*x(:) */
531       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
532       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
533       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
534       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
535       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
536       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
537       vj++; tp = t + (*vj)*6;
538       v += 36;
539     }
540     tp    = t + k*6;
541     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
542     idx   = 6*r[k];
543     x[idx]     = x0;
544     x[idx+1]   = x1;
545     x[idx+2]   = x2;
546     x[idx+3]   = x3;
547     x[idx+4]   = x4;
548     x[idx+5]   = x5;
549   }
550 
551   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
552   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
553   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
554   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
560 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
561 {
562   MatScalar      *v,*d;
563   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
564   PetscInt       nz,*vj,k;
565 
566   PetscFunctionBegin;
567   for (k=0; k<mbs; k++){
568     v  = aa + 36*ai[k];
569     xp = x + k*6;
570     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
571     nz = ai[k+1] - ai[k];
572     vj = aj + ai[k];
573     xp = x + (*vj)*6;
574     while (nz--) {
575       /* x(:) += U(k,:)^T*(Dk*xk) */
576       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
577       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
578       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
579       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
580       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
581       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
582       vj++; xp = x + (*vj)*6;
583       v += 36;
584     }
585     /* xk = inv(Dk)*(Dk*xk) */
586     d  = aa+k*36;          /* ptr to inv(Dk) */
587     xp = x + k*6;
588     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
589     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
590     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
591     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
592     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
593     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
594   }
595   PetscFunctionReturn(0);
596 }
597 #undef __FUNCT__
598 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private"
599 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
600 {
601   MatScalar      *v;
602   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
603   PetscInt       nz,*vj,k;
604 
605   PetscFunctionBegin;
606   for (k=mbs-1; k>=0; k--){
607     v  = aa + 36*ai[k];
608     xp = x + k*6;
609     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
610     nz = ai[k+1] - ai[k];
611     vj = aj + ai[k];
612     xp = x + (*vj)*6;
613     while (nz--) {
614       /* xk += U(k,:)*x(:) */
615       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
616       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
617       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
618       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
619       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
620       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
621       vj++;
622       v += 36; xp = x + (*vj)*6;
623     }
624     xp = x + k*6;
625     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
633 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
634 {
635   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
636   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
637   MatScalar      *aa=a->a;
638   PetscScalar    *x,*b;
639   PetscErrorCode ierr;
640 
641   PetscFunctionBegin;
642   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
643   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
644 
645   /* solve U^T * D * y = b by forward substitution */
646   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
647   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
648 
649   /* solve U*x = y by back substitution */
650   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
651 
652   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
653   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
654   ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering"
660 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
661 {
662   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
663   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
664   MatScalar      *aa=a->a;
665   PetscScalar    *x,*b;
666   PetscErrorCode ierr;
667 
668   PetscFunctionBegin;
669   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
670   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
671   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
672   ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
673   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
674   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
675   ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering"
681 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
682 {
683   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
684   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
685   MatScalar      *aa=a->a;
686   PetscScalar    *x,*b;
687   PetscErrorCode ierr;
688 
689   PetscFunctionBegin;
690   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
693   ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
694   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
695   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
696   ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
702 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
703 {
704   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
705   IS             isrow=a->row;
706   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
707   PetscErrorCode ierr;
708   PetscInt       nz,*vj,k,*r,idx;
709   MatScalar      *aa=a->a,*v,*diag;
710   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
711 
712   PetscFunctionBegin;
713   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
714   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
715   t  = a->solve_work;
716   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
717 
718   /* solve U^T * D * y = b by forward substitution */
719   tp = t;
720   for (k=0; k<mbs; k++) { /* t <- perm(b) */
721     idx   = 5*r[k];
722     tp[0] = b[idx];
723     tp[1] = b[idx+1];
724     tp[2] = b[idx+2];
725     tp[3] = b[idx+3];
726     tp[4] = b[idx+4];
727     tp += 5;
728   }
729 
730   for (k=0; k<mbs; k++){
731     v  = aa + 25*ai[k];
732     vj = aj + ai[k];
733     tp = t + k*5;
734     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
735     nz = ai[k+1] - ai[k];
736 
737     tp = t + (*vj)*5;
738     while (nz--) {
739       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
740       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
741       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
742       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
743       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
744       vj++; tp = t + (*vj)*5;
745       v += 25;
746     }
747 
748     /* xk = inv(Dk)*(Dk*xk) */
749     diag  = aa+k*25;          /* ptr to inv(Dk) */
750     tp    = t + k*5;
751       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
752       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
753       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
754       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
755       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
756   }
757 
758   /* solve U*x = y by back substitution */
759   for (k=mbs-1; k>=0; k--){
760     v  = aa + 25*ai[k];
761     vj = aj + ai[k];
762     tp    = t + k*5;
763     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
764     nz = ai[k+1] - ai[k];
765 
766     tp = t + (*vj)*5;
767     while (nz--) {
768       /* xk += U(k,:)*x(:) */
769       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
770       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
771       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
772       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
773       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
774       vj++; tp = t + (*vj)*5;
775       v += 25;
776     }
777     tp    = t + k*5;
778     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
779     idx   = 5*r[k];
780     x[idx]     = x0;
781     x[idx+1]   = x1;
782     x[idx+2]   = x2;
783     x[idx+3]   = x3;
784     x[idx+4]   = x4;
785   }
786 
787   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
788   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
796 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
797 {
798   MatScalar      *v,*diag;
799   PetscScalar    *xp,x0,x1,x2,x3,x4;
800   PetscInt       nz,*vj,k;
801 
802   PetscFunctionBegin;
803   for (k=0; k<mbs; k++){
804     v  = aa + 25*ai[k];
805     xp = x + k*5;
806     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
807     nz = ai[k+1] - ai[k];
808     vj = aj + ai[k];
809     xp = x + (*vj)*5;
810     while (nz--) {
811       /* x(:) += U(k,:)^T*(Dk*xk) */
812       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
813       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
814       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
815       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
816       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
817       vj++; xp = x + (*vj)*5;
818       v += 25;
819     }
820     /* xk = inv(Dk)*(Dk*xk) */
821     diag = aa+k*25;          /* ptr to inv(Dk) */
822     xp   = x + k*5;
823     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
824     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
825     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
826     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
827     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private"
834 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
835 {
836   MatScalar      *v;
837   PetscScalar    *xp,x0,x1,x2,x3,x4;
838   PetscInt       nz,*vj,k;
839 
840   PetscFunctionBegin;
841   for (k=mbs-1; k>=0; k--){
842     v  = aa + 25*ai[k];
843     xp = x + k*5;
844     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
845     nz = ai[k+1] - ai[k];
846     vj = aj + ai[k];
847     xp = x + (*vj)*5;
848     while (nz--) {
849       /* xk += U(k,:)*x(:) */
850       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
851       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
852       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
853       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
854       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
855       vj++;
856       v += 25; xp = x + (*vj)*5;
857     }
858     xp = x + k*5;
859     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
860   }
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
866 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
867 {
868   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
869   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
870   MatScalar      *aa=a->a;
871   PetscScalar    *x,*b;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
876   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
877 
878   /* solve U^T * D * y = b by forward substitution */
879   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
880   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
881 
882   /* solve U*x = y by back substitution */
883   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
884 
885   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
886   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887   ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering"
893 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
894 {
895   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
896   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
897   MatScalar      *aa=a->a;
898   PetscScalar    *x,*b;
899   PetscErrorCode ierr;
900 
901   PetscFunctionBegin;
902   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
903   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
904   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
905   ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
906   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
907   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
908   ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering"
914 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
915 {
916   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
917   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
918   MatScalar      *aa=a->a;
919   PetscScalar    *x,*b;
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
924   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
925   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
926   ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
927   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
935 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
936 {
937   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
938   IS             isrow=a->row;
939   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
940   PetscErrorCode ierr;
941   PetscInt       nz,*vj,k,*r,idx;
942   MatScalar      *aa=a->a,*v,*diag;
943   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
944 
945   PetscFunctionBegin;
946   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
947   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
948   t  = a->solve_work;
949   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
950 
951   /* solve U^T * D * y = b by forward substitution */
952   tp = t;
953   for (k=0; k<mbs; k++) { /* t <- perm(b) */
954     idx   = 4*r[k];
955     tp[0] = b[idx];
956     tp[1] = b[idx+1];
957     tp[2] = b[idx+2];
958     tp[3] = b[idx+3];
959     tp += 4;
960   }
961 
962   for (k=0; k<mbs; k++){
963     v  = aa + 16*ai[k];
964     vj = aj + ai[k];
965     tp = t + k*4;
966     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
967     nz = ai[k+1] - ai[k];
968 
969     tp = t + (*vj)*4;
970     while (nz--) {
971       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
972       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
973       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
974       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
975       vj++; tp = t + (*vj)*4;
976       v += 16;
977     }
978 
979     /* xk = inv(Dk)*(Dk*xk) */
980     diag  = aa+k*16;          /* ptr to inv(Dk) */
981     tp    = t + k*4;
982     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
983     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
984     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
985     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
986   }
987 
988   /* solve U*x = y by back substitution */
989   for (k=mbs-1; k>=0; k--){
990     v  = aa + 16*ai[k];
991     vj = aj + ai[k];
992     tp    = t + k*4;
993     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
994     nz = ai[k+1] - ai[k];
995 
996     tp = t + (*vj)*4;
997     while (nz--) {
998       /* xk += U(k,:)*x(:) */
999       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1000       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1001       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1002       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1003       vj++; tp = t + (*vj)*4;
1004       v += 16;
1005     }
1006     tp    = t + k*4;
1007     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1008     idx        = 4*r[k];
1009     x[idx]     = x0;
1010     x[idx+1]   = x1;
1011     x[idx+2]   = x2;
1012     x[idx+3]   = x3;
1013   }
1014 
1015   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1016   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1017   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1018   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1024 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1025 {
1026   MatScalar      *v,*diag;
1027   PetscScalar    *xp,x0,x1,x2,x3;
1028   PetscInt       nz,*vj,k;
1029 
1030   PetscFunctionBegin;
1031   for (k=0; k<mbs; k++){
1032     v  = aa + 16*ai[k];
1033     xp = x + k*4;
1034     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1035     nz = ai[k+1] - ai[k];
1036     vj = aj + ai[k];
1037     xp = x + (*vj)*4;
1038     while (nz--) {
1039       /* x(:) += U(k,:)^T*(Dk*xk) */
1040       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1041       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1042       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1043       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1044       vj++; xp = x + (*vj)*4;
1045       v += 16;
1046     }
1047     /* xk = inv(Dk)*(Dk*xk) */
1048     diag = aa+k*16;          /* ptr to inv(Dk) */
1049     xp   = x + k*4;
1050     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1051     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1052     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1053     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private"
1060 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1061 {
1062   MatScalar      *v;
1063   PetscScalar    *xp,x0,x1,x2,x3;
1064   PetscInt       nz,*vj,k;
1065 
1066   PetscFunctionBegin;
1067   for (k=mbs-1; k>=0; k--){
1068     v  = aa + 16*ai[k];
1069     xp = x + k*4;
1070     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1071     nz = ai[k+1] - ai[k];
1072     vj = aj + ai[k];
1073     xp = x + (*vj)*4;
1074     while (nz--) {
1075       /* xk += U(k,:)*x(:) */
1076       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1077       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1078       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1079       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1080       vj++;
1081       v += 16; xp = x + (*vj)*4;
1082     }
1083     xp = x + k*4;
1084     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
1091 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1092 {
1093   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1094   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1095   MatScalar      *aa=a->a;
1096   PetscScalar    *x,*b;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1101   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1102 
1103   /* solve U^T * D * y = b by forward substitution */
1104   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1105   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1106 
1107   /* solve U*x = y by back substitution */
1108   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1109   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1110   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1111   ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering"
1117 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1118 {
1119   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1120   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1121   MatScalar      *aa=a->a;
1122   PetscScalar    *x,*b;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1127   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1128   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
1129   ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1130   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1132   ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering"
1138 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1139 {
1140   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1141   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1142   MatScalar      *aa=a->a;
1143   PetscScalar    *x,*b;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1148   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1149   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1150   ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1151   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1152   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1153   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
1159 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1160 {
1161   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1162   IS             isrow=a->row;
1163   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1164   PetscErrorCode ierr;
1165   PetscInt       nz,*vj,k,*r,idx;
1166   MatScalar      *aa=a->a,*v,*diag;
1167   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
1168 
1169   PetscFunctionBegin;
1170   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1171   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1172   t  = a->solve_work;
1173   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1174 
1175   /* solve U^T * D * y = b by forward substitution */
1176   tp = t;
1177   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1178     idx   = 3*r[k];
1179     tp[0] = b[idx];
1180     tp[1] = b[idx+1];
1181     tp[2] = b[idx+2];
1182     tp += 3;
1183   }
1184 
1185   for (k=0; k<mbs; k++){
1186     v  = aa + 9*ai[k];
1187     vj = aj + ai[k];
1188     tp = t + k*3;
1189     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1190     nz = ai[k+1] - ai[k];
1191 
1192     tp = t + (*vj)*3;
1193     while (nz--) {
1194       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1195       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1196       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1197       vj++; tp = t + (*vj)*3;
1198       v += 9;
1199     }
1200 
1201     /* xk = inv(Dk)*(Dk*xk) */
1202     diag  = aa+k*9;          /* ptr to inv(Dk) */
1203     tp    = t + k*3;
1204     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1205     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1206     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1207   }
1208 
1209   /* solve U*x = y by back substitution */
1210   for (k=mbs-1; k>=0; k--){
1211     v  = aa + 9*ai[k];
1212     vj = aj + ai[k];
1213     tp    = t + k*3;
1214     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1215     nz = ai[k+1] - ai[k];
1216 
1217     tp = t + (*vj)*3;
1218     while (nz--) {
1219       /* xk += U(k,:)*x(:) */
1220       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1221       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1222       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1223       vj++; tp = t + (*vj)*3;
1224       v += 9;
1225     }
1226     tp    = t + k*3;
1227     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1228     idx      = 3*r[k];
1229     x[idx]   = x0;
1230     x[idx+1] = x1;
1231     x[idx+2] = x2;
1232   }
1233 
1234   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1235   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1236   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1237   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1243 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1244 {
1245   MatScalar      *v,*diag;
1246   PetscScalar    *xp,x0,x1,x2;
1247   PetscInt       nz,*vj,k;
1248 
1249   PetscFunctionBegin;
1250   for (k=0; k<mbs; k++){
1251     v  = aa + 9*ai[k];
1252     xp = x + k*3;
1253     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1254     nz = ai[k+1] - ai[k];
1255     vj = aj + ai[k];
1256     xp = x + (*vj)*3;
1257     while (nz--) {
1258       /* x(:) += U(k,:)^T*(Dk*xk) */
1259       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1260       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1261       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1262       vj++; xp = x + (*vj)*3;
1263       v += 9;
1264     }
1265     /* xk = inv(Dk)*(Dk*xk) */
1266     diag = aa+k*9;          /* ptr to inv(Dk) */
1267     xp   = x + k*3;
1268     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1269     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1270     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private"
1277 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1278 {
1279   MatScalar      *v;
1280   PetscScalar    *xp,x0,x1,x2;
1281   PetscInt       nz,*vj,k;
1282 
1283   PetscFunctionBegin;
1284   for (k=mbs-1; k>=0; k--){
1285     v  = aa + 9*ai[k];
1286     xp = x + k*3;
1287     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1288     nz = ai[k+1] - ai[k];
1289     vj = aj + ai[k];
1290     xp = x + (*vj)*3;
1291     while (nz--) {
1292       /* xk += U(k,:)*x(:) */
1293       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1294       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1295       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1296       vj++;
1297       v += 9; xp = x + (*vj)*3;
1298     }
1299     xp = x + k*3;
1300     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1301   }
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1307 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1308 {
1309   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1310   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1311   MatScalar      *aa=a->a;
1312   PetscScalar    *x,*b;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1317   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1318 
1319   /* solve U^T * D * y = b by forward substitution */
1320   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1321   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1322 
1323   /* solve U*x = y by back substitution */
1324   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1325 
1326   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1327   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1328   ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering"
1334 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1335 {
1336   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1337   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1338   MatScalar      *aa=a->a;
1339   PetscScalar    *x,*b;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1344   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1345   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1346   ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1347   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1348   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1349   ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering"
1355 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1356 {
1357   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1358   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1359   MatScalar      *aa=a->a;
1360   PetscScalar    *x,*b;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1365   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1366   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1367   ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1369   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1370   ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1376 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1377 {
1378   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1379   IS             isrow=a->row;
1380   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1381   PetscErrorCode ierr;
1382   PetscInt       nz,*vj,k,k2,*r,idx;
1383   MatScalar      *aa=a->a,*v,*diag;
1384   PetscScalar    *x,*b,x0,x1,*t;
1385 
1386   PetscFunctionBegin;
1387   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1388   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1389   t  = a->solve_work;
1390   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1391 
1392   /* solve U^T * D * y = perm(b) by forward substitution */
1393   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1394     idx = 2*r[k];
1395     t[k*2]   = b[idx];
1396     t[k*2+1] = b[idx+1];
1397   }
1398   for (k=0; k<mbs; k++){
1399     v  = aa + 4*ai[k];
1400     vj = aj + ai[k];
1401     k2 = k*2;
1402     x0 = t[k2]; x1 = t[k2+1];
1403     nz = ai[k+1] - ai[k];
1404     while (nz--) {
1405       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1406       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1407       vj++; v += 4;
1408     }
1409     diag = aa+k*4;  /* ptr to inv(Dk) */
1410     t[k2]   = diag[0]*x0 + diag[2]*x1;
1411     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1412   }
1413 
1414   /* solve U*x = y by back substitution */
1415   for (k=mbs-1; k>=0; k--){
1416     v  = aa + 4*ai[k];
1417     vj = aj + ai[k];
1418     k2 = k*2;
1419     x0 = t[k2]; x1 = t[k2+1];
1420     nz = ai[k+1] - ai[k];
1421     while (nz--) {
1422       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1423       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1424       vj++; v += 4;
1425     }
1426     t[k2]    = x0;
1427     t[k2+1]  = x1;
1428     idx      = 2*r[k];
1429     x[idx]   = x0;
1430     x[idx+1] = x1;
1431   }
1432 
1433   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1434   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1435   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1436   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1442 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1443 {
1444   MatScalar      *v,*diag;
1445   PetscScalar    x0,x1;
1446   PetscInt       nz,*vj,k,k2;
1447 
1448   PetscFunctionBegin;
1449   for (k=0; k<mbs; k++){
1450     v  = aa + 4*ai[k];
1451     vj = aj + ai[k];
1452     k2 = k*2;
1453     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1454     nz = ai[k+1] - ai[k];
1455 
1456     while (nz--) {
1457       /* x(:) += U(k,:)^T*(Dk*xk) */
1458       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1459       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1460       vj++; v += 4;
1461     }
1462     /* xk = inv(Dk)*(Dk*xk) */
1463     diag = aa+k*4;          /* ptr to inv(Dk) */
1464     x[k2]   = diag[0]*x0 + diag[2]*x1;
1465     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private"
1472 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1473 {
1474   MatScalar      *v;
1475   PetscScalar    x0,x1;
1476   PetscInt       nz,*vj,k,k2;
1477 
1478   PetscFunctionBegin;
1479   for (k=mbs-1; k>=0; k--){
1480     v  = aa + 4*ai[k];
1481     vj = aj + ai[k];
1482     k2 = k*2;
1483     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1484     nz = ai[k+1] - ai[k];
1485     while (nz--) {
1486       /* xk += U(k,:)*x(:) */
1487       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1488       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1489       vj++; v += 4;
1490     }
1491     x[k2]     = x0;
1492     x[k2+1]   = x1;
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1499 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1500 {
1501   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1502   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1503   MatScalar      *aa=a->a;
1504   PetscScalar    *x,*b;
1505   PetscErrorCode ierr;
1506 
1507   PetscFunctionBegin;
1508   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1509   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1510 
1511   /* solve U^T * D * y = b by forward substitution */
1512   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1513   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1514 
1515   /* solve U*x = y by back substitution */
1516   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1517 
1518   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1519   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1520   ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering"
1526 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1527 {
1528   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1529   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1530   MatScalar      *aa=a->a;
1531   PetscScalar    *x,*b;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1536   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1537   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1538   ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1539   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1541   ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering"
1547 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1548 {
1549   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1550   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1551   MatScalar      *aa=a->a;
1552   PetscScalar    *x,*b;
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1557   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1558   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1559   ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr);
1560   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1562   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1568 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1569 {
1570   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1571   IS             isrow=a->row;
1572   PetscErrorCode ierr;
1573   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1574   MatScalar      *aa=a->a,*v;
1575   PetscScalar    *x,*b,xk,*t;
1576   PetscInt       nz,*vj,k;
1577 
1578   PetscFunctionBegin;
1579   if (!mbs) PetscFunctionReturn(0);
1580   /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */
1581 
1582   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1583   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1584   t    = a->solve_work;
1585 
1586   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1587 
1588   /* solve U^T*D*y = perm(b) by forward substitution */
1589   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1590   for (k=0; k<mbs; k++){
1591     v  = aa + ai[k] + 1;
1592     vj = aj + ai[k] + 1;
1593     xk = t[k];
1594     nz = ai[k+1] - ai[k] - 1;
1595     while (nz--) t[*vj++] += (*v++) * xk;
1596     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1597   }
1598 
1599   /* solve U*x = y by back substitution */
1600   for (k=mbs-1; k>=0; k--){
1601     v  = aa + ai[k] + 1;
1602     vj = aj + ai[k] + 1;
1603     xk = t[k];
1604     nz = ai[k+1] - ai[k] - 1;
1605     while (nz--) xk += (*v++) * t[*vj++];
1606     t[k]      = xk;
1607     x[rip[k]] = xk;
1608   }
1609 
1610   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1611   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1612   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1613   ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1619 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1620 {
1621   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1622   PetscErrorCode ierr;
1623 
1624   PetscFunctionBegin;
1625   if (A->rmap.bs == 1) {
1626     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1627   } else {
1628     IS              isrow=a->row;
1629     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1630     MatScalar       *aa=a->a,*v;
1631     PetscScalar     *x,*b,*t;
1632     PetscInt             nz,*vj,k,n;
1633     if (bb->n > a->solves_work_n) {
1634       ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
1635       ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1636       a->solves_work_n = bb->n;
1637     }
1638     n    = bb->n;
1639     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1640     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1641     t    = a->solves_work;
1642 
1643     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1644 
1645     /* solve U^T*D*y = perm(b) by forward substitution */
1646     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1647     for (k=0; k<mbs; k++){
1648       v  = aa + ai[k];
1649       vj = aj + ai[k];
1650       nz = ai[k+1] - ai[k];
1651       while (nz--) {
1652         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1653         v++;vj++;
1654       }
1655       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1656     }
1657 
1658     /* solve U*x = y by back substitution */
1659     for (k=mbs-1; k>=0; k--){
1660       v  = aa + ai[k];
1661       vj = aj + ai[k];
1662       nz = ai[k+1] - ai[k];
1663       while (nz--) {
1664         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1665         v++;vj++;
1666       }
1667       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1668     }
1669 
1670     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1671     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1672     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1673     ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 /*
1679       Special case where the matrix was ILU(0) factored in the natural
1680    ordering. This eliminates the need for the column and row permutation.
1681 */
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1684 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1685 {
1686   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1687   PetscErrorCode ierr;
1688   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1689   MatScalar      *aa=a->a,*v;
1690   PetscScalar    *x,*b,xk;
1691   PetscInt       nz,*vj,k;
1692 
1693   PetscFunctionBegin;
1694   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1695   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1696 
1697   /* solve U^T*D*y = b by forward substitution */
1698   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1699   for (k=0; k<mbs; k++){
1700     v  = aa + ai[k] + 1;
1701     vj = aj + ai[k] + 1;
1702     xk = x[k];
1703     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1704     while (nz--) x[*vj++] += (*v++) * xk;
1705     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1706   }
1707 
1708   /* solve U*x = y by back substitution */
1709   for (k=mbs-2; k>=0; k--){
1710     v  = aa + ai[k] + 1;
1711     vj = aj + ai[k] + 1;
1712     xk = x[k];
1713     nz = ai[k+1] - ai[k] - 1;
1714     while (nz--) xk += (*v++) * x[*vj++];
1715     x[k] = xk;
1716   }
1717 
1718   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1719   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1720   ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering"
1726 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1727 {
1728   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1729   PetscErrorCode ierr;
1730   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1731   MatScalar      *aa=a->a,*v;
1732   PetscScalar    *x,*b,xk;
1733   PetscInt       nz,*vj,k;
1734 
1735   PetscFunctionBegin;
1736   /* solve U^T*D*x = b by forward substitution */
1737   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1738   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1739   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1740   for (k=0; k<mbs; k++){
1741     v  = aa + ai[k] + 1;
1742     vj = aj + ai[k] + 1;
1743     xk = x[k];
1744     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1745     while (nz--) x[*vj++] += (*v++) * xk;
1746     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1747   }
1748   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1749   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1750   ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering"
1756 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1757 {
1758   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1759   PetscErrorCode ierr;
1760   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1761   MatScalar      *aa=a->a,*v;
1762   PetscScalar    *x,*b,xk;
1763   PetscInt       nz,*vj,k;
1764 
1765   PetscFunctionBegin;
1766   /* solve U*x = b by back substitution */
1767   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1768   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1769   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1770   for (k=mbs-2; k>=0; k--){
1771     v  = aa + ai[k] + 1;
1772     vj = aj + ai[k] + 1;
1773     xk = x[k];
1774     nz = ai[k+1] - ai[k] - 1;
1775     while (nz--) xk += (*v++) * x[*vj++];
1776     x[k] = xk;
1777   }
1778   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1779   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1780   ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1787 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1788 {
1789   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1790   PetscErrorCode ierr;
1791   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1792   PetscInt       *jutmp,bs = A->rmap.bs,bs2=a->bs2;
1793   PetscInt       m,reallocs = 0,*levtmp;
1794   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1795   PetscInt       incrlev,*lev,shift,prow,nz;
1796   PetscReal      f = info->fill,levels = info->levels;
1797   PetscTruth     perm_identity;
1798 
1799   PetscFunctionBegin;
1800   /* check whether perm is the identity mapping */
1801   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1802 
1803   if (perm_identity){
1804     a->permute = PETSC_FALSE;
1805     ai = a->i; aj = a->j;
1806   } else { /*  non-trivial permutation */
1807     a->permute = PETSC_TRUE;
1808     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1809     ai = a->inew; aj = a->jnew;
1810   }
1811 
1812   /* initialization */
1813   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1814   umax  = (PetscInt)(f*ai[mbs] + 1);
1815   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1816   umax += mbs + 1;
1817   shift = mbs + 1;
1818   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1819   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1820   iu[0] = mbs + 1;
1821   juidx = mbs + 1;
1822   /* prowl: linked list for pivot row */
1823   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1824   /* q: linked list for col index */
1825   q       = prowl + mbs;
1826   levtmp  = q     + mbs;
1827 
1828   for (i=0; i<mbs; i++){
1829     prowl[i] = mbs;
1830     q[i] = 0;
1831   }
1832 
1833   /* for each row k */
1834   for (k=0; k<mbs; k++){
1835     nzk  = 0;
1836     q[k] = mbs;
1837     /* copy current row into linked list */
1838     nz = ai[rip[k]+1] - ai[rip[k]];
1839     j = ai[rip[k]];
1840     while (nz--){
1841       vj = rip[aj[j++]];
1842       if (vj > k){
1843         qm = k;
1844         do {
1845           m  = qm; qm = q[m];
1846         } while(qm < vj);
1847         if (qm == vj) {
1848           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1849         }
1850         nzk++;
1851         q[m]       = vj;
1852         q[vj]      = qm;
1853         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1854       }
1855     }
1856 
1857     /* modify nonzero structure of k-th row by computing fill-in
1858        for each row prow to be merged in */
1859     prow = k;
1860     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1861 
1862     while (prow < k){
1863       /* merge row prow into k-th row */
1864       jmin = iu[prow] + 1;
1865       jmax = iu[prow+1];
1866       qm = k;
1867       for (j=jmin; j<jmax; j++){
1868         incrlev = lev[j-shift] + 1;
1869 	if (incrlev > levels) continue;
1870         vj      = ju[j];
1871         do {
1872           m = qm; qm = q[m];
1873         } while (qm < vj);
1874         if (qm != vj){      /* a fill */
1875           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1876           levtmp[vj] = incrlev;
1877         } else {
1878           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1879         }
1880       }
1881       prow = prowl[prow]; /* next pivot row */
1882     }
1883 
1884     /* add k to row list for first nonzero element in k-th row */
1885     if (nzk > 1){
1886       i = q[k]; /* col value of first nonzero element in k_th row of U */
1887       prowl[k] = prowl[i]; prowl[i] = k;
1888     }
1889     iu[k+1] = iu[k] + nzk;
1890 
1891     /* allocate more space to ju and lev if needed */
1892     if (iu[k+1] > umax) {
1893       /* estimate how much additional space we will need */
1894       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1895       /* just double the memory each time */
1896       maxadd = umax;
1897       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1898       umax += maxadd;
1899 
1900       /* allocate a longer ju */
1901       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1902       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1903       ierr     = PetscFree(ju);CHKERRQ(ierr);
1904       ju       = jutmp;
1905 
1906       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1907       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1908       ierr     = PetscFree(lev);CHKERRQ(ierr);
1909       lev      = jutmp;
1910       reallocs += 2; /* count how many times we realloc */
1911     }
1912 
1913     /* save nonzero structure of k-th row in ju */
1914     i=k;
1915     while (nzk --) {
1916       i                = q[i];
1917       ju[juidx]        = i;
1918       lev[juidx-shift] = levtmp[i];
1919       juidx++;
1920     }
1921   }
1922 
1923 #if defined(PETSC_USE_INFO)
1924   if (ai[mbs] != 0) {
1925     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1926     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
1927     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
1928     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
1929     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1930   } else {
1931     ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
1932   }
1933 #endif
1934 
1935   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1936   ierr = PetscFree(prowl);CHKERRQ(ierr);
1937   ierr = PetscFree(lev);CHKERRQ(ierr);
1938 
1939   /* put together the new matrix */
1940   ierr = MatCreate(A->comm,B);CHKERRQ(ierr);
1941   ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr);
1942   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1943   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1944 
1945   /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */
1946   b    = (Mat_SeqSBAIJ*)(*B)->data;
1947   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
1948   b->singlemalloc = PETSC_FALSE;
1949   b->free_a       = PETSC_TRUE;
1950   b->free_ij      = PETSC_TRUE;
1951   /* the next line frees the default space generated by the Create() */
1952   ierr    = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr);
1953   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1954   b->j    = ju;
1955   b->i    = iu;
1956   b->diag = 0;
1957   b->ilen = 0;
1958   b->imax = 0;
1959 
1960   if (b->row) {
1961     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1962   }
1963   if (b->icol) {
1964     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1965   }
1966   b->row  = perm;
1967   b->icol = perm;
1968   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1969   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1970   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1971   /* In b structure:  Free imax, ilen, old a, old j.
1972      Allocate idnew, solve_work, new a, new j */
1973   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1974   b->maxnz = b->nz = iu[mbs];
1975 
1976   (*B)->factor                 = FACTOR_CHOLESKY;
1977   (*B)->info.factor_mallocs    = reallocs;
1978   (*B)->info.fill_ratio_given  = f;
1979   if (ai[mbs] != 0) {
1980     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1981   } else {
1982     (*B)->info.fill_ratio_needed = 0.0;
1983   }
1984 
1985   if (perm_identity){
1986     switch (bs) {
1987       case 1:
1988         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1989         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1990         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1991         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1992         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
1993         break;
1994       case 2:
1995         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1996         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1997         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1998         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
1999         break;
2000       case 3:
2001         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2002         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2003         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2004         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2005         break;
2006       case 4:
2007         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2008         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2009         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2010         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2011         break;
2012       case 5:
2013         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2014         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2015         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2016         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2017         break;
2018       case 6:
2019         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2020         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2021         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2022         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2023         break;
2024       case 7:
2025         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2026         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2027         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2028         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2029       break;
2030       default:
2031         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2032         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2033         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2034         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2035       break;
2036     }
2037   }
2038 
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #include "petscbt.h"
2043 #include "src/mat/utils/freespace.h"
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
2046 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2047 {
2048   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2049   Mat_SeqSBAIJ       *b;
2050   Mat                B;
2051   PetscErrorCode     ierr;
2052   PetscTruth         perm_identity,free_ij = PETSC_TRUE;
2053   PetscInt           bs=A->rmap.bs,am=a->mbs;
2054   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
2055   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2056   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2057   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2058   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2059   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2060   PetscBT            lnkbt;
2061   PetscScalar        *ua;
2062 
2063   PetscFunctionBegin;
2064   /*
2065    This code originally uses Modified Sparse Row (MSR) storage
2066    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2067    Then it is rewritten so the factor B takes seqsbaij format. However the associated
2068    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2069    thus the original code in MSR format is still used for these cases.
2070    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2071    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2072   */
2073   if (bs > 1){
2074     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
2075     PetscFunctionReturn(0);
2076   }
2077 
2078   /* check whether perm is the identity mapping */
2079   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2080 
2081   /* special case that simply copies fill pattern */
2082   if (!levels && perm_identity) {
2083     a->permute = PETSC_FALSE;
2084     /* reuse the column pointers and row offsets for memory savings */
2085     ui           = a->i;
2086     uj           = a->j;
2087     free_ij      = PETSC_FALSE;
2088     ratio_needed = 1.0;
2089   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2090     if (perm_identity){
2091       a->permute = PETSC_FALSE;
2092       ai = a->i; aj = a->j;
2093     } else { /*  non-trivial permutation */
2094       a->permute = PETSC_TRUE;
2095       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
2096       ai   = a->inew; aj = a->jnew;
2097     }
2098     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2099 
2100     /* initialization */
2101     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
2102     ui[0] = 0;
2103 
2104     /* jl: linked list for storing indices of the pivot rows
2105        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2106     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
2107     il         = jl + am;
2108     uj_ptr     = (PetscInt**)(il + am);
2109     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2110     for (i=0; i<am; i++){
2111       jl[i] = am; il[i] = 0;
2112     }
2113 
2114     /* create and initialize a linked list for storing column indices of the active row k */
2115     nlnk = am + 1;
2116     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2117 
2118     /* initial FreeSpace size is fill*(ai[am]+1) */
2119     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
2120     current_space = free_space;
2121     ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
2122     current_space_lvl = free_space_lvl;
2123 
2124     for (k=0; k<am; k++){  /* for each active row k */
2125       /* initialize lnk by the column indices of row rip[k] */
2126       nzk   = 0;
2127       ncols = ai[rip[k]+1] - ai[rip[k]];
2128       cols  = aj+ai[rip[k]];
2129       ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2130       nzk += nlnk;
2131 
2132       /* update lnk by computing fill-in for each pivot row to be merged in */
2133       prow = jl[k]; /* 1st pivot row */
2134 
2135       while (prow < k){
2136         nextprow = jl[prow];
2137 
2138         /* merge prow into k-th row */
2139         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2140         jmax = ui[prow+1];
2141         ncols = jmax-jmin;
2142         i     = jmin - ui[prow];
2143         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2144         j     = *(uj_lvl_ptr[prow] + i - 1);
2145         cols_lvl = uj_lvl_ptr[prow]+i;
2146         ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2147         nzk += nlnk;
2148 
2149         /* update il and jl for prow */
2150         if (jmin < jmax){
2151           il[prow] = jmin;
2152           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2153         }
2154         prow = nextprow;
2155       }
2156 
2157       /* if free space is not available, make more free space */
2158       if (current_space->local_remaining<nzk) {
2159         i = am - k + 1; /* num of unfactored rows */
2160         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2161         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2162         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2163         reallocs++;
2164       }
2165 
2166       /* copy data into free_space and free_space_lvl, then initialize lnk */
2167       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2168 
2169       /* add the k-th row into il and jl */
2170       if (nzk-1 > 0){
2171         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2172         jl[k] = jl[i]; jl[i] = k;
2173         il[k] = ui[k] + 1;
2174       }
2175       uj_ptr[k]     = current_space->array;
2176       uj_lvl_ptr[k] = current_space_lvl->array;
2177 
2178       current_space->array           += nzk;
2179       current_space->local_used      += nzk;
2180       current_space->local_remaining -= nzk;
2181       current_space_lvl->array           += nzk;
2182       current_space_lvl->local_used      += nzk;
2183       current_space_lvl->local_remaining -= nzk;
2184 
2185       ui[k+1] = ui[k] + nzk;
2186     }
2187 
2188 #if defined(PETSC_USE_INFO)
2189     if (ai[am] != 0) {
2190       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2191       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr);
2192       ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
2193       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr);
2194     } else {
2195       ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr);
2196     }
2197 #endif
2198 
2199     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2200     ierr = PetscFree(jl);CHKERRQ(ierr);
2201 
2202     /* destroy list of free space and other temporary array(s) */
2203     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
2204     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2205     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2206     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2207     if (ai[am] != 0) {
2208       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2209     } else {
2210       ratio_needed = 0.0;
2211     }
2212   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2213 
2214   /* put together the new matrix in MATSEQSBAIJ format */
2215   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr);
2216   ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr);
2217   B = *fact;
2218   b = (Mat_SeqSBAIJ*)B->data;
2219   ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr);
2220   b->singlemalloc = PETSC_FALSE;
2221   b->free_a  = PETSC_TRUE;
2222   b->free_ij = free_ij;
2223   b->diag    = 0;
2224   b->ilen    = 0;
2225   b->imax    = 0;
2226   b->row     = perm;
2227   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2228   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2229   b->icol = perm;
2230   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2231   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
2232   b->maxnz = b->nz = ui[am];
2233 
2234   B->factor                 = FACTOR_CHOLESKY;
2235   B->info.factor_mallocs    = reallocs;
2236   B->info.fill_ratio_given  = fill;
2237   B->info.fill_ratio_needed = ratio_needed;
2238 
2239   if (perm_identity){
2240     switch (bs) {
2241       case 1:
2242         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2243         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2244         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2245         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr);
2246         break;
2247       case 2:
2248         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2249         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2250         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2251         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
2252         break;
2253       case 3:
2254         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2255         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2256         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2257         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
2258         break;
2259       case 4:
2260         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2261         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2262         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2263         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
2264         break;
2265       case 5:
2266         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2267         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2268         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2269         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
2270         break;
2271       case 6:
2272         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2273         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2274         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2275         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
2276         break;
2277       case 7:
2278         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2279         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2280         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2281         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
2282       break;
2283       default:
2284         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2285         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2286         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2287         ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr);
2288       break;
2289     }
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294