xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
1 /*
2     Factorization code for SBAIJ format.
3 */
4 
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 #include "src/inline/dot.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
13 {
14   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
15   IS              isrow=a->row;
16   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
17   PetscErrorCode  ierr;
18   PetscInt        nz,*vj,k,*r,idx,k1;
19   PetscInt        bs=A->bs,bs2 = a->bs2;
20   MatScalar       *aa=a->a,*v,*diag;
21   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
22 
23   PetscFunctionBegin;
24   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
25   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26   t  = a->solve_work;
27   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
29 
30   /* solve U^T * D * y = b by forward substitution */
31   xk = t;
32   for (k=0; k<mbs; k++) { /* t <- perm(b) */
33     idx   = bs*r[k];
34     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
35   }
36   for (k=0; k<mbs; k++){
37     v  = aa + bs2*ai[k];
38     xk = t + k*bs;      /* Dk*xk = k-th block of x */
39     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
40     nz = ai[k+1] - ai[k];
41     vj = aj + ai[k];
42     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
43     while (nz--) {
44       /* x(:) += U(k,:)^T*(Dk*xk) */
45       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
46       vj++; xj = t + (*vj)*bs;
47       v += bs2;
48     }
49     /* xk = inv(Dk)*(Dk*xk) */
50     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
51     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
52   }
53 
54   /* solve U*x = y by back substitution */
55   for (k=mbs-1; k>=0; k--){
56     v  = aa + bs2*ai[k];
57     xk = t + k*bs;        /* xk */
58     nz = ai[k+1] - ai[k];
59     vj = aj + ai[k];
60     xj = t + (*vj)*bs;
61     while (nz--) {
62       /* xk += U(k,:)*x(:) */
63       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
64       vj++;
65       v += bs2; xj = t + (*vj)*bs;
66     }
67     idx   = bs*r[k];
68     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
69   }
70 
71   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
72   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
73   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
74   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
75   PetscLogFlops(bs2*(2*a->nz + mbs));
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
81 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
82 {
83   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
84   PetscErrorCode ierr;
85   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
86   PetscInt       nz,*vj,k;
87   PetscInt       bs=A->bs,bs2 = a->bs2;
88   MatScalar      *aa=a->a,*v,*diag;
89   PetscScalar    *x,*xk,*xj,*b,*xk_tmp;
90 
91   PetscFunctionBegin;
92 
93   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
94   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
95 
96   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
97 
98   /* solve U^T * D * y = b by forward substitution */
99   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
100   for (k=0; k<mbs; k++){
101     v  = aa + bs2*ai[k];
102     xk = x + k*bs;      /* Dk*xk = k-th block of x */
103     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
104     nz = ai[k+1] - ai[k];
105     vj = aj + ai[k];
106     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
107     while (nz--) {
108       /* x(:) += U(k,:)^T*(Dk*xk) */
109       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
110       vj++; xj = x + (*vj)*bs;
111       v += bs2;
112     }
113     /* xk = inv(Dk)*(Dk*xk) */
114     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
115     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
116   }
117 
118   /* solve U*x = y by back substitution */
119   for (k=mbs-1; k>=0; k--){
120     v  = aa + bs2*ai[k];
121     xk = x + k*bs;        /* xk */
122     nz = ai[k+1] - ai[k];
123     vj = aj + ai[k];
124     xj = x + (*vj)*bs;
125     while (nz--) {
126       /* xk += U(k,:)*x(:) */
127       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
128       vj++;
129       v += bs2; xj = x + (*vj)*bs;
130     }
131   }
132 
133   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
134   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
135   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
136   PetscLogFlops(bs2*(2*a->nz + mbs));
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
142 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
143 {
144   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
145   IS             isrow=a->row;
146   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
147   PetscErrorCode ierr;
148   PetscInt       nz,*vj,k,*r,idx;
149   MatScalar      *aa=a->a,*v,*d;
150   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
151 
152   PetscFunctionBegin;
153   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
155   t  = a->solve_work;
156   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
157 
158   /* solve U^T * D * y = b by forward substitution */
159   tp = t;
160   for (k=0; k<mbs; k++) { /* t <- perm(b) */
161     idx   = 7*r[k];
162     tp[0] = b[idx];
163     tp[1] = b[idx+1];
164     tp[2] = b[idx+2];
165     tp[3] = b[idx+3];
166     tp[4] = b[idx+4];
167     tp[5] = b[idx+5];
168     tp[6] = b[idx+6];
169     tp += 7;
170   }
171 
172   for (k=0; k<mbs; k++){
173     v  = aa + 49*ai[k];
174     vj = aj + ai[k];
175     tp = t + k*7;
176     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
177     nz = ai[k+1] - ai[k];
178     tp = t + (*vj)*7;
179     while (nz--) {
180       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
181       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
182       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
183       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
184       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
185       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
186       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
187       vj++; tp = t + (*vj)*7;
188       v += 49;
189     }
190 
191     /* xk = inv(Dk)*(Dk*xk) */
192     d  = aa+k*49;          /* ptr to inv(Dk) */
193     tp    = t + k*7;
194     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
195     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
196     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
197     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
198     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
199     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
200     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
201   }
202 
203   /* solve U*x = y by back substitution */
204   for (k=mbs-1; k>=0; k--){
205     v  = aa + 49*ai[k];
206     vj = aj + ai[k];
207     tp    = t + k*7;
208     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
209     nz = ai[k+1] - ai[k];
210 
211     tp = t + (*vj)*7;
212     while (nz--) {
213       /* xk += U(k,:)*x(:) */
214       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
215       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
216       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
217       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
218       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
219       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
220       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
221       vj++; tp = t + (*vj)*7;
222       v += 49;
223     }
224     tp    = t + k*7;
225     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
226     idx   = 7*r[k];
227     x[idx]     = x0;
228     x[idx+1]   = x1;
229     x[idx+2]   = x2;
230     x[idx+3]   = x3;
231     x[idx+4]   = x4;
232     x[idx+5]   = x5;
233     x[idx+6]   = x6;
234   }
235 
236   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
237   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
238   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
239   PetscLogFlops(49*(2*a->nz + mbs));
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
245 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
246 {
247   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
248   PetscErrorCode ierr;
249   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
250   MatScalar      *aa=a->a,*v,*d;
251   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
252   PetscInt       nz,*vj,k;
253 
254   PetscFunctionBegin;
255   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
256   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
257 
258   /* solve U^T * D * y = b by forward substitution */
259   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
260   for (k=0; k<mbs; k++){
261     v  = aa + 49*ai[k];
262     xp = x + k*7;
263     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
264     nz = ai[k+1] - ai[k];
265     vj = aj + ai[k];
266     xp = x + (*vj)*7;
267     while (nz--) {
268       /* x(:) += U(k,:)^T*(Dk*xk) */
269       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
270       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
271       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
272       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
273       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
274       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
275       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
276       vj++; xp = x + (*vj)*7;
277       v += 49;
278     }
279     /* xk = inv(Dk)*(Dk*xk) */
280     d  = aa+k*49;          /* ptr to inv(Dk) */
281     xp = x + k*7;
282     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
283     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
284     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
285     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
286     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
287     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
288     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
289   }
290 
291   /* solve U*x = y by back substitution */
292   for (k=mbs-1; k>=0; k--){
293     v  = aa + 49*ai[k];
294     xp = x + k*7;
295     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
296     nz = ai[k+1] - ai[k];
297     vj = aj + ai[k];
298     xp = x + (*vj)*7;
299     while (nz--) {
300       /* xk += U(k,:)*x(:) */
301       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
302       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
303       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
304       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
305       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
306       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
307       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
308       vj++;
309       v += 49; xp = x + (*vj)*7;
310     }
311     xp = x + k*7;
312     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
313   }
314 
315   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
316   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
317   PetscLogFlops(49*(2*a->nz + mbs));
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
323 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
324 {
325   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
326   IS             isrow=a->row;
327   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
328   PetscErrorCode ierr;
329   PetscInt       nz,*vj,k,*r,idx;
330   MatScalar      *aa=a->a,*v,*d;
331   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
332 
333   PetscFunctionBegin;
334   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
335   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
336   t  = a->solve_work;
337   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
338 
339   /* solve U^T * D * y = b by forward substitution */
340   tp = t;
341   for (k=0; k<mbs; k++) { /* t <- perm(b) */
342     idx   = 6*r[k];
343     tp[0] = b[idx];
344     tp[1] = b[idx+1];
345     tp[2] = b[idx+2];
346     tp[3] = b[idx+3];
347     tp[4] = b[idx+4];
348     tp[5] = b[idx+5];
349     tp += 6;
350   }
351 
352   for (k=0; k<mbs; k++){
353     v  = aa + 36*ai[k];
354     vj = aj + ai[k];
355     tp = t + k*6;
356     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
357     nz = ai[k+1] - ai[k];
358     tp = t + (*vj)*6;
359     while (nz--) {
360       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
361       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
362       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
363       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
364       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
365       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
366       vj++; tp = t + (*vj)*6;
367       v += 36;
368     }
369 
370     /* xk = inv(Dk)*(Dk*xk) */
371     d  = aa+k*36;          /* ptr to inv(Dk) */
372     tp    = t + k*6;
373     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
374     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
375     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
376     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
377     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
378     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
379   }
380 
381   /* solve U*x = y by back substitution */
382   for (k=mbs-1; k>=0; k--){
383     v  = aa + 36*ai[k];
384     vj = aj + ai[k];
385     tp    = t + k*6;
386     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
387     nz = ai[k+1] - ai[k];
388 
389     tp = t + (*vj)*6;
390     while (nz--) {
391       /* xk += U(k,:)*x(:) */
392       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
393       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
394       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
395       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
396       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
397       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
398       vj++; tp = t + (*vj)*6;
399       v += 36;
400     }
401     tp    = t + k*6;
402     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
403     idx   = 6*r[k];
404     x[idx]     = x0;
405     x[idx+1]   = x1;
406     x[idx+2]   = x2;
407     x[idx+3]   = x3;
408     x[idx+4]   = x4;
409     x[idx+5]   = x5;
410   }
411 
412   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
413   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
414   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
415   PetscLogFlops(36*(2*a->nz + mbs));
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
421 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
422 {
423   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
424   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
425   MatScalar      *aa=a->a,*v,*d;
426   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4,x5;
427   PetscErrorCode ierr;
428   PetscInt       nz,*vj,k;
429 
430   PetscFunctionBegin;
431 
432   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
433   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
434 
435   /* solve U^T * D * y = b by forward substitution */
436   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
437   for (k=0; k<mbs; k++){
438     v  = aa + 36*ai[k];
439     xp = x + k*6;
440     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
441     nz = ai[k+1] - ai[k];
442     vj = aj + ai[k];
443     xp = x + (*vj)*6;
444     while (nz--) {
445       /* x(:) += U(k,:)^T*(Dk*xk) */
446       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
447       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
448       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
449       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
450       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
451       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
452       vj++; xp = x + (*vj)*6;
453       v += 36;
454     }
455     /* xk = inv(Dk)*(Dk*xk) */
456     d  = aa+k*36;          /* ptr to inv(Dk) */
457     xp = x + k*6;
458     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
459     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
460     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
461     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
462     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
463     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
464   }
465 
466   /* solve U*x = y by back substitution */
467   for (k=mbs-1; k>=0; k--){
468     v  = aa + 36*ai[k];
469     xp = x + k*6;
470     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
471     nz = ai[k+1] - ai[k];
472     vj = aj + ai[k];
473     xp = x + (*vj)*6;
474     while (nz--) {
475       /* xk += U(k,:)*x(:) */
476       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
477       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
478       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
479       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
480       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
481       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
482       vj++;
483       v += 36; xp = x + (*vj)*6;
484     }
485     xp = x + k*6;
486     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
487   }
488 
489   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
490   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
491   PetscLogFlops(36*(2*a->nz + mbs));
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
497 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
498 {
499   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
500   IS             isrow=a->row;
501   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
502   PetscErrorCode ierr;
503   PetscInt       nz,*vj,k,*r,idx;
504   MatScalar      *aa=a->a,*v,*diag;
505   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;
506 
507   PetscFunctionBegin;
508   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
509   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
510   t  = a->solve_work;
511   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
512 
513   /* solve U^T * D * y = b by forward substitution */
514   tp = t;
515   for (k=0; k<mbs; k++) { /* t <- perm(b) */
516     idx   = 5*r[k];
517     tp[0] = b[idx];
518     tp[1] = b[idx+1];
519     tp[2] = b[idx+2];
520     tp[3] = b[idx+3];
521     tp[4] = b[idx+4];
522     tp += 5;
523   }
524 
525   for (k=0; k<mbs; k++){
526     v  = aa + 25*ai[k];
527     vj = aj + ai[k];
528     tp = t + k*5;
529     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
530     nz = ai[k+1] - ai[k];
531 
532     tp = t + (*vj)*5;
533     while (nz--) {
534       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
535       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
536       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
537       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
538       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
539       vj++; tp = t + (*vj)*5;
540       v += 25;
541     }
542 
543     /* xk = inv(Dk)*(Dk*xk) */
544     diag  = aa+k*25;          /* ptr to inv(Dk) */
545     tp    = t + k*5;
546       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
547       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
548       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
549       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
550       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
551   }
552 
553   /* solve U*x = y by back substitution */
554   for (k=mbs-1; k>=0; k--){
555     v  = aa + 25*ai[k];
556     vj = aj + ai[k];
557     tp    = t + k*5;
558     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
559     nz = ai[k+1] - ai[k];
560 
561     tp = t + (*vj)*5;
562     while (nz--) {
563       /* xk += U(k,:)*x(:) */
564       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
565       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
566       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
567       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
568       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
569       vj++; tp = t + (*vj)*5;
570       v += 25;
571     }
572     tp    = t + k*5;
573     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
574     idx   = 5*r[k];
575     x[idx]     = x0;
576     x[idx+1]   = x1;
577     x[idx+2]   = x2;
578     x[idx+3]   = x3;
579     x[idx+4]   = x4;
580   }
581 
582   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
583   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
584   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
585   PetscLogFlops(25*(2*a->nz + mbs));
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
591 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
592 {
593   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
594   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
595   MatScalar      *aa=a->a,*v,*diag;
596   PetscScalar    *x,*xp,*b,x0,x1,x2,x3,x4;
597   PetscErrorCode ierr;
598   PetscInt       nz,*vj,k;
599 
600   PetscFunctionBegin;
601 
602   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
603   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
604 
605   /* solve U^T * D * y = b by forward substitution */
606   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
607   for (k=0; k<mbs; k++){
608     v  = aa + 25*ai[k];
609     xp = x + k*5;
610     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
611     nz = ai[k+1] - ai[k];
612     vj = aj + ai[k];
613     xp = x + (*vj)*5;
614     while (nz--) {
615       /* x(:) += U(k,:)^T*(Dk*xk) */
616       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
617       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
618       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
619       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
620       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
621       vj++; xp = x + (*vj)*5;
622       v += 25;
623     }
624     /* xk = inv(Dk)*(Dk*xk) */
625     diag = aa+k*25;          /* ptr to inv(Dk) */
626     xp   = x + k*5;
627     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
628     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
629     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
630     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
631     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
632   }
633 
634   /* solve U*x = y by back substitution */
635   for (k=mbs-1; k>=0; k--){
636     v  = aa + 25*ai[k];
637     xp = x + k*5;
638     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
639     nz = ai[k+1] - ai[k];
640     vj = aj + ai[k];
641     xp = x + (*vj)*5;
642     while (nz--) {
643       /* xk += U(k,:)*x(:) */
644       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
645       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
646       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
647       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
648       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
649       vj++;
650       v += 25; xp = x + (*vj)*5;
651     }
652     xp = x + k*5;
653     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
654   }
655 
656   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   PetscLogFlops(25*(2*a->nz + mbs));
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
664 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
665 {
666   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
667   IS             isrow=a->row;
668   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
669   PetscErrorCode ierr;
670   PetscInt       nz,*vj,k,*r,idx;
671   MatScalar      *aa=a->a,*v,*diag;
672   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;
673 
674   PetscFunctionBegin;
675   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
676   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
677   t  = a->solve_work;
678   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
679 
680   /* solve U^T * D * y = b by forward substitution */
681   tp = t;
682   for (k=0; k<mbs; k++) { /* t <- perm(b) */
683     idx   = 4*r[k];
684     tp[0] = b[idx];
685     tp[1] = b[idx+1];
686     tp[2] = b[idx+2];
687     tp[3] = b[idx+3];
688     tp += 4;
689   }
690 
691   for (k=0; k<mbs; k++){
692     v  = aa + 16*ai[k];
693     vj = aj + ai[k];
694     tp = t + k*4;
695     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
696     nz = ai[k+1] - ai[k];
697 
698     tp = t + (*vj)*4;
699     while (nz--) {
700       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
701       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
702       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
703       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
704       vj++; tp = t + (*vj)*4;
705       v += 16;
706     }
707 
708     /* xk = inv(Dk)*(Dk*xk) */
709     diag  = aa+k*16;          /* ptr to inv(Dk) */
710     tp    = t + k*4;
711     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
712     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
713     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
714     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
715   }
716 
717   /* solve U*x = y by back substitution */
718   for (k=mbs-1; k>=0; k--){
719     v  = aa + 16*ai[k];
720     vj = aj + ai[k];
721     tp    = t + k*4;
722     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
723     nz = ai[k+1] - ai[k];
724 
725     tp = t + (*vj)*4;
726     while (nz--) {
727       /* xk += U(k,:)*x(:) */
728       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
729       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
730       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
731       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
732       vj++; tp = t + (*vj)*4;
733       v += 16;
734     }
735     tp    = t + k*4;
736     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
737     idx        = 4*r[k];
738     x[idx]     = x0;
739     x[idx+1]   = x1;
740     x[idx+2]   = x2;
741     x[idx+3]   = x3;
742   }
743 
744   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
745   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
746   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
747   PetscLogFlops(16*(2*a->nz + mbs));
748   PetscFunctionReturn(0);
749 }
750 
751 /*
752    Special case where the matrix was factored in the natural ordering.
753    This eliminates the need for the column and row permutation.
754 */
755 #undef __FUNCT__
756 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
757 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
758 {
759   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
760   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
761   MatScalar      *aa=a->a,*v,*diag;
762   PetscScalar    *x,*xp,*b,x0,x1,x2,x3;
763   PetscErrorCode ierr;
764   PetscInt       nz,*vj,k;
765 
766   PetscFunctionBegin;
767 
768   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
769   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
770 
771   /* solve U^T * D * y = b by forward substitution */
772   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
773   for (k=0; k<mbs; k++){
774     v  = aa + 16*ai[k];
775     xp = x + k*4;
776     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
777     nz = ai[k+1] - ai[k];
778     vj = aj + ai[k];
779     xp = x + (*vj)*4;
780     while (nz--) {
781       /* x(:) += U(k,:)^T*(Dk*xk) */
782       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
783       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
784       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
785       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
786       vj++; xp = x + (*vj)*4;
787       v += 16;
788     }
789     /* xk = inv(Dk)*(Dk*xk) */
790     diag = aa+k*16;          /* ptr to inv(Dk) */
791     xp   = x + k*4;
792     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
793     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
794     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
795     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
796   }
797 
798   /* solve U*x = y by back substitution */
799   for (k=mbs-1; k>=0; k--){
800     v  = aa + 16*ai[k];
801     xp = x + k*4;
802     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
803     nz = ai[k+1] - ai[k];
804     vj = aj + ai[k];
805     xp = x + (*vj)*4;
806     while (nz--) {
807       /* xk += U(k,:)*x(:) */
808       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
809       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
810       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
811       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
812       vj++;
813       v += 16; xp = x + (*vj)*4;
814     }
815     xp = x + k*4;
816     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
817   }
818 
819   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821   PetscLogFlops(16*(2*a->nz + mbs));
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
827 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
828 {
829   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
830   IS             isrow=a->row;
831   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
832   PetscErrorCode ierr;
833   PetscInt       nz,*vj,k,*r,idx;
834   MatScalar      *aa=a->a,*v,*diag;
835   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;
836 
837   PetscFunctionBegin;
838   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
839   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
840   t  = a->solve_work;
841   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
842 
843   /* solve U^T * D * y = b by forward substitution */
844   tp = t;
845   for (k=0; k<mbs; k++) { /* t <- perm(b) */
846     idx   = 3*r[k];
847     tp[0] = b[idx];
848     tp[1] = b[idx+1];
849     tp[2] = b[idx+2];
850     tp += 3;
851   }
852 
853   for (k=0; k<mbs; k++){
854     v  = aa + 9*ai[k];
855     vj = aj + ai[k];
856     tp = t + k*3;
857     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
858     nz = ai[k+1] - ai[k];
859 
860     tp = t + (*vj)*3;
861     while (nz--) {
862       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
863       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
864       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
865       vj++; tp = t + (*vj)*3;
866       v += 9;
867     }
868 
869     /* xk = inv(Dk)*(Dk*xk) */
870     diag  = aa+k*9;          /* ptr to inv(Dk) */
871     tp    = t + k*3;
872     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
873     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
874     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
875   }
876 
877   /* solve U*x = y by back substitution */
878   for (k=mbs-1; k>=0; k--){
879     v  = aa + 9*ai[k];
880     vj = aj + ai[k];
881     tp    = t + k*3;
882     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
883     nz = ai[k+1] - ai[k];
884 
885     tp = t + (*vj)*3;
886     while (nz--) {
887       /* xk += U(k,:)*x(:) */
888       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
889       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
890       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
891       vj++; tp = t + (*vj)*3;
892       v += 9;
893     }
894     tp    = t + k*3;
895     tp[0] = x0; tp[1] = x1; tp[2] = x2;
896     idx      = 3*r[k];
897     x[idx]   = x0;
898     x[idx+1] = x1;
899     x[idx+2] = x2;
900   }
901 
902   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
903   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
904   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
905   PetscLogFlops(9*(2*a->nz + mbs));
906   PetscFunctionReturn(0);
907 }
908 
909 /*
910    Special case where the matrix was factored in the natural ordering.
911    This eliminates the need for the column and row permutation.
912 */
913 #undef __FUNCT__
914 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
915 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
916 {
917   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
918   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
919   MatScalar      *aa=a->a,*v,*diag;
920   PetscScalar    *x,*xp,*b,x0,x1,x2;
921   PetscErrorCode ierr;
922   PetscInt       nz,*vj,k;
923 
924   PetscFunctionBegin;
925 
926   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
927   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
928 
929   /* solve U^T * D * y = b by forward substitution */
930   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
931   for (k=0; k<mbs; k++){
932     v  = aa + 9*ai[k];
933     xp = x + k*3;
934     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
935     nz = ai[k+1] - ai[k];
936     vj = aj + ai[k];
937     xp = x + (*vj)*3;
938     while (nz--) {
939       /* x(:) += U(k,:)^T*(Dk*xk) */
940       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
941       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
942       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
943       vj++; xp = x + (*vj)*3;
944       v += 9;
945     }
946     /* xk = inv(Dk)*(Dk*xk) */
947     diag = aa+k*9;          /* ptr to inv(Dk) */
948     xp   = x + k*3;
949     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
950     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
951     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
952   }
953 
954   /* solve U*x = y by back substitution */
955   for (k=mbs-1; k>=0; k--){
956     v  = aa + 9*ai[k];
957     xp = x + k*3;
958     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
959     nz = ai[k+1] - ai[k];
960     vj = aj + ai[k];
961     xp = x + (*vj)*3;
962     while (nz--) {
963       /* xk += U(k,:)*x(:) */
964       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
965       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
966       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
967       vj++;
968       v += 9; xp = x + (*vj)*3;
969     }
970     xp = x + k*3;
971     xp[0] = x0; xp[1] = x1; xp[2] = x2;
972   }
973 
974   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   PetscLogFlops(9*(2*a->nz + mbs));
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
982 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
983 {
984   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
985   IS             isrow=a->row;
986   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
987   PetscErrorCode ierr;
988   PetscInt       nz,*vj,k,k2,*r,idx;
989   MatScalar      *aa=a->a,*v,*diag;
990   PetscScalar    *x,*b,x0,x1,*t;
991 
992   PetscFunctionBegin;
993   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
994   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
995   t  = a->solve_work;
996   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
997   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
998 
999   /* solve U^T * D * y = perm(b) by forward substitution */
1000   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1001     idx = 2*r[k];
1002     t[k*2]   = b[idx];
1003     t[k*2+1] = b[idx+1];
1004   }
1005   for (k=0; k<mbs; k++){
1006     v  = aa + 4*ai[k];
1007     vj = aj + ai[k];
1008     k2 = k*2;
1009     x0 = t[k2]; x1 = t[k2+1];
1010     nz = ai[k+1] - ai[k];
1011     while (nz--) {
1012       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1013       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1014       vj++; v += 4;
1015     }
1016     diag = aa+k*4;  /* ptr to inv(Dk) */
1017     t[k2]   = diag[0]*x0 + diag[2]*x1;
1018     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1019   }
1020 
1021   /* solve U*x = y by back substitution */
1022   for (k=mbs-1; k>=0; k--){
1023     v  = aa + 4*ai[k];
1024     vj = aj + ai[k];
1025     k2 = k*2;
1026     x0 = t[k2]; x1 = t[k2+1];
1027     nz = ai[k+1] - ai[k];
1028     while (nz--) {
1029       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1030       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1031       vj++; v += 4;
1032     }
1033     t[k2]    = x0;
1034     t[k2+1]  = x1;
1035     idx      = 2*r[k];
1036     x[idx]   = x0;
1037     x[idx+1] = x1;
1038   }
1039 
1040   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1041   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1042   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1043   PetscLogFlops(4*(2*a->nz + mbs));
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 /*
1048    Special case where the matrix was factored in the natural ordering.
1049    This eliminates the need for the column and row permutation.
1050 */
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1053 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1054 {
1055   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1056   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1057   MatScalar      *aa=a->a,*v,*diag;
1058   PetscScalar    *x,*b,x0,x1;
1059   PetscErrorCode ierr;
1060   PetscInt       nz,*vj,k,k2;
1061 
1062   PetscFunctionBegin;
1063 
1064   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1065   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1066 
1067   /* solve U^T * D * y = b by forward substitution */
1068   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1069   for (k=0; k<mbs; k++){
1070     v  = aa + 4*ai[k];
1071     vj = aj + ai[k];
1072     k2 = k*2;
1073     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1074     nz = ai[k+1] - ai[k];
1075 
1076     while (nz--) {
1077       /* x(:) += U(k,:)^T*(Dk*xk) */
1078       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1079       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1080       vj++; v += 4;
1081     }
1082     /* xk = inv(Dk)*(Dk*xk) */
1083     diag = aa+k*4;          /* ptr to inv(Dk) */
1084     x[k2]   = diag[0]*x0 + diag[2]*x1;
1085     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1086   }
1087 
1088   /* solve U*x = y by back substitution */
1089   for (k=mbs-1; k>=0; k--){
1090     v  = aa + 4*ai[k];
1091     vj = aj + ai[k];
1092     k2 = k*2;
1093     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1094     nz = ai[k+1] - ai[k];
1095     while (nz--) {
1096       /* xk += U(k,:)*x(:) */
1097       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1098       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1099       vj++; v += 4;
1100     }
1101     x[k2]     = x0;
1102     x[k2+1]   = x1;
1103   }
1104 
1105   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1106   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1107   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1113 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1114 {
1115   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1116   IS             isrow=a->row;
1117   PetscErrorCode ierr;
1118   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1119   MatScalar      *aa=a->a,*v;
1120   PetscScalar    *x,*b,xk,*t;
1121   PetscInt       nz,*vj,k;
1122 
1123   PetscFunctionBegin;
1124   if (!mbs) PetscFunctionReturn(0);
1125   /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */
1126 
1127   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1128   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1129   t    = a->solve_work;
1130 
1131   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1132 
1133   /* solve U^T*D*y = perm(b) by forward substitution */
1134   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1135   for (k=0; k<mbs; k++){
1136     v  = aa + ai[k] + 1;
1137     vj = aj + ai[k] + 1;
1138     xk = t[k];
1139     nz = ai[k+1] - ai[k] - 1;
1140     while (nz--) t[*vj++] += (*v++) * xk;
1141     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1142   }
1143 
1144   /* solve U*x = y by back substitution */
1145   for (k=mbs-1; k>=0; k--){
1146     v  = aa + ai[k] + 1;
1147     vj = aj + ai[k] + 1;
1148     xk = t[k];
1149     nz = ai[k+1] - ai[k] - 1;
1150     while (nz--) xk += (*v++) * t[*vj++];
1151     t[k]      = xk;
1152     x[rip[k]] = xk;
1153   }
1154 
1155   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1156   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1157   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1158   PetscLogFlops(4*a->nz + A->m);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1164 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1165 {
1166   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   if (A->bs == 1) {
1171     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1172   } else {
1173     IS              isrow=a->row;
1174     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1175     MatScalar       *aa=a->a,*v;
1176     PetscScalar     *x,*b,*t;
1177     PetscInt             nz,*vj,k,n;
1178     if (bb->n > a->solves_work_n) {
1179       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1180       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1181       a->solves_work_n = bb->n;
1182     }
1183     n    = bb->n;
1184     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1185     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1186     t    = a->solves_work;
1187 
1188     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1189 
1190     /* solve U^T*D*y = perm(b) by forward substitution */
1191     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1192     for (k=0; k<mbs; k++){
1193       v  = aa + ai[k];
1194       vj = aj + ai[k];
1195       nz = ai[k+1] - ai[k];
1196       while (nz--) {
1197         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1198         v++;vj++;
1199       }
1200       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1201     }
1202 
1203     /* solve U*x = y by back substitution */
1204     for (k=mbs-1; k>=0; k--){
1205       v  = aa + ai[k];
1206       vj = aj + ai[k];
1207       nz = ai[k+1] - ai[k];
1208       while (nz--) {
1209         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1210         v++;vj++;
1211       }
1212       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1213     }
1214 
1215     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1216     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1217     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1218     PetscLogFlops(bb->n*(4*a->nz + A->m));
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /*
1224       Special case where the matrix was ILU(0) factored in the natural
1225    ordering. This eliminates the need for the column and row permutation.
1226 */
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1229 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1230 {
1231   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1232   PetscErrorCode ierr;
1233   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1234   MatScalar      *aa=a->a,*v;
1235   PetscScalar    *x,*b,xk;
1236   PetscInt       nz,*vj,k;
1237 
1238   PetscFunctionBegin;
1239   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1240   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1241 
1242   /* solve U^T*D*y = b by forward substitution */
1243   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1244   for (k=0; k<mbs; k++){
1245     v  = aa + ai[k] + 1;
1246     vj = aj + ai[k] + 1;
1247     xk = x[k];
1248     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1249     while (nz--) x[*vj++] += (*v++) * xk;
1250     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1251   }
1252 
1253   /* solve U*x = y by back substitution */
1254   for (k=mbs-2; k>=0; k--){
1255     v  = aa + ai[k] + 1;
1256     vj = aj + ai[k] + 1;
1257     xk = x[k];
1258     nz = ai[k+1] - ai[k] - 1;
1259     while (nz--) xk += (*v++) * x[*vj++];
1260     x[k] = xk;
1261   }
1262 
1263   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1264   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1265   PetscLogFlops(4*a->nz + A->m);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR"
1272 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1273 {
1274   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1275   PetscErrorCode ierr;
1276   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1277   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
1278   PetscInt       m,reallocs = 0,*levtmp;
1279   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1280   PetscInt       incrlev,*lev,shift,prow,nz;
1281   PetscReal      f = info->fill,levels = info->levels;
1282   PetscTruth     perm_identity;
1283 
1284   PetscFunctionBegin;
1285   /* check whether perm is the identity mapping */
1286   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1287 
1288   if (perm_identity){
1289     a->permute = PETSC_FALSE;
1290     ai = a->i; aj = a->j;
1291   } else { /*  non-trivial permutation */
1292     a->permute = PETSC_TRUE;
1293     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1294     ai = a->inew; aj = a->jnew;
1295   }
1296 
1297   /* initialization */
1298   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1299   umax  = (PetscInt)(f*ai[mbs] + 1);
1300   ierr  = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr);
1301   umax += mbs + 1;
1302   shift = mbs + 1;
1303   ierr  = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr);
1304   ierr  = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr);
1305   iu[0] = mbs + 1;
1306   juidx = mbs + 1;
1307   /* prowl: linked list for pivot row */
1308   ierr    = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr);
1309   /* q: linked list for col index */
1310   q       = prowl + mbs;
1311   levtmp  = q     + mbs;
1312 
1313   for (i=0; i<mbs; i++){
1314     prowl[i] = mbs;
1315     q[i] = 0;
1316   }
1317 
1318   /* for each row k */
1319   for (k=0; k<mbs; k++){
1320     nzk  = 0;
1321     q[k] = mbs;
1322     /* copy current row into linked list */
1323     nz = ai[rip[k]+1] - ai[rip[k]];
1324     j = ai[rip[k]];
1325     while (nz--){
1326       vj = rip[aj[j++]];
1327       if (vj > k){
1328         qm = k;
1329         do {
1330           m  = qm; qm = q[m];
1331         } while(qm < vj);
1332         if (qm == vj) {
1333           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1334         }
1335         nzk++;
1336         q[m]       = vj;
1337         q[vj]      = qm;
1338         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1339       }
1340     }
1341 
1342     /* modify nonzero structure of k-th row by computing fill-in
1343        for each row prow to be merged in */
1344     prow = k;
1345     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1346 
1347     while (prow < k){
1348       /* merge row prow into k-th row */
1349       jmin = iu[prow] + 1;
1350       jmax = iu[prow+1];
1351       qm = k;
1352       for (j=jmin; j<jmax; j++){
1353         incrlev = lev[j-shift] + 1;
1354 	if (incrlev > levels) continue;
1355         vj      = ju[j];
1356         do {
1357           m = qm; qm = q[m];
1358         } while (qm < vj);
1359         if (qm != vj){      /* a fill */
1360           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1361           levtmp[vj] = incrlev;
1362         } else {
1363           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1364         }
1365       }
1366       prow = prowl[prow]; /* next pivot row */
1367     }
1368 
1369     /* add k to row list for first nonzero element in k-th row */
1370     if (nzk > 1){
1371       i = q[k]; /* col value of first nonzero element in k_th row of U */
1372       prowl[k] = prowl[i]; prowl[i] = k;
1373     }
1374     iu[k+1] = iu[k] + nzk;
1375 
1376     /* allocate more space to ju and lev if needed */
1377     if (iu[k+1] > umax) {
1378       /* estimate how much additional space we will need */
1379       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1380       /* just double the memory each time */
1381       maxadd = umax;
1382       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1383       umax += maxadd;
1384 
1385       /* allocate a longer ju */
1386       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1387       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr);
1388       ierr     = PetscFree(ju);CHKERRQ(ierr);
1389       ju       = jutmp;
1390 
1391       ierr     = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr);
1392       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr);
1393       ierr     = PetscFree(lev);CHKERRQ(ierr);
1394       lev      = jutmp;
1395       reallocs += 2; /* count how many times we realloc */
1396     }
1397 
1398     /* save nonzero structure of k-th row in ju */
1399     i=k;
1400     while (nzk --) {
1401       i                = q[i];
1402       ju[juidx]        = i;
1403       lev[juidx-shift] = levtmp[i];
1404       juidx++;
1405     }
1406   }
1407 
1408   if (ai[mbs] != 0) {
1409     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1410     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1411     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1412     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1413     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1414   } else {
1415     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1416   }
1417 
1418   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1419   ierr = PetscFree(prowl);CHKERRQ(ierr);
1420   ierr = PetscFree(lev);CHKERRQ(ierr);
1421 
1422   /* put together the new matrix */
1423   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1424   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1425   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1426 
1427   /* PetscLogObjectParent(*B,iperm); */
1428   b    = (Mat_SeqSBAIJ*)(*B)->data;
1429   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1430   b->singlemalloc = PETSC_FALSE;
1431   /* the next line frees the default space generated by the Create() */
1432   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1433   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1434   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1435   b->j    = ju;
1436   b->i    = iu;
1437   b->diag = 0;
1438   b->ilen = 0;
1439   b->imax = 0;
1440 
1441   if (b->row) {
1442     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1443   }
1444   if (b->icol) {
1445     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1446   }
1447   b->row  = perm;
1448   b->icol = perm;
1449   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1450   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1451   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1452   /* In b structure:  Free imax, ilen, old a, old j.
1453      Allocate idnew, solve_work, new a, new j */
1454   ierr    = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1455   b->maxnz = b->nz = iu[mbs];
1456 
1457   (*B)->factor                 = FACTOR_CHOLESKY;
1458   (*B)->info.factor_mallocs    = reallocs;
1459   (*B)->info.fill_ratio_given  = f;
1460   if (ai[mbs] != 0) {
1461     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1462   } else {
1463     (*B)->info.fill_ratio_needed = 0.0;
1464   }
1465 
1466   if (perm_identity){
1467     switch (bs) {
1468       case 1:
1469         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1470         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1471         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1472         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1473         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1474         break;
1475       case 2:
1476         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1477         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1478         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1479         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1480         break;
1481       case 3:
1482         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1483         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1484         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1485         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1486         break;
1487       case 4:
1488         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1489         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1490         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1491         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1492         break;
1493       case 5:
1494         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1495         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1496         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1497         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1498         break;
1499       case 6:
1500         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1501         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1502         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1503         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1504         break;
1505       case 7:
1506         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1507         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1508         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1509         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1510       break;
1511       default:
1512         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1513         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1514         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1515         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1516       break;
1517     }
1518   }
1519 
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #include "petscbt.h"
1524 #include "src/mat/utils/freespace.h"
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1527 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1528 {
1529   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1530   Mat_SeqSBAIJ   *b;
1531   Mat            B;
1532   PetscErrorCode ierr;
1533   PetscTruth     perm_identity;
1534   PetscInt       bs=A->bs,am=a->mbs;
1535   PetscInt       reallocs=0,*rip,i,*ai,*aj,*ui;
1536   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1537   PetscInt       nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
1538   PetscReal      fill=info->fill,levels=info->levels;
1539   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1540   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1541   PetscBT        lnkbt;
1542 
1543   PetscFunctionBegin;
1544   /*
1545    This code originally uses Modified Sparse Row (MSR) storage
1546    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
1547    Then it is rewritten so the factor B takes seqsbaij format. However the associated
1548    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
1549    thus the original code in MSR format is still used for these cases.
1550    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
1551    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
1552   */
1553   if (bs > 1){
1554     ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr);
1555     PetscFunctionReturn(0);
1556   }
1557 
1558   /* check whether perm is the identity mapping */
1559   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1560 
1561   /* special case that simply copies fill pattern */
1562   if (!levels && perm_identity) {
1563     a->permute = PETSC_FALSE;
1564     ai = a->i; aj = a->j;
1565     ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1566     ierr = PetscMemcpy(ui,ai,(am+1)*sizeof(PetscInt));CHKERRQ(ierr);
1567     ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1568     ierr = PetscMemcpy(uj,aj,ai[am]*sizeof(PetscInt));CHKERRQ(ierr);
1569 
1570   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1571     if (perm_identity){
1572       a->permute = PETSC_FALSE;
1573       ai = a->i; aj = a->j;
1574     } else { /*  non-trivial permutation */
1575       a->permute = PETSC_TRUE;
1576       ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1577       ai   = a->inew; aj = a->jnew;
1578     }
1579     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1580 
1581     /* initialization */
1582     ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr);
1583     ui[0] = 0;
1584     ierr  = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr);
1585 
1586     /* jl: linked list for storing indices of the pivot rows
1587        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1588     ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr);
1589     il         = jl + am;
1590     uj_ptr     = (PetscInt**)(il + am);
1591     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1592     for (i=0; i<am; i++){
1593       jl[i] = am; il[i] = 0;
1594     }
1595 
1596     /* create and initialize a linked list for storing column indices of the active row k */
1597     nlnk = am + 1;
1598     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1599 
1600     /* initial FreeSpace size is fill*(ai[am]+1) */
1601     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr);
1602     current_space = free_space;
1603     ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr);
1604     current_space_lvl = free_space_lvl;
1605 
1606     for (k=0; k<am; k++){  /* for each active row k */
1607       /* initialize lnk by the column indices of row rip[k] */
1608       nzk   = 0;
1609       ncols = ai[rip[k]+1] - ai[rip[k]];
1610       cols  = cols_lvl + am;
1611       for (j=0; j<ncols; j++){
1612         i = *(aj + ai[rip[k]] + j);
1613         cols[j] = rip[i];
1614         cols_lvl[j] = -1;  /* initialize level for nonzero entries */
1615       }
1616       ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1617       nzk += nlnk;
1618 
1619       /* update lnk by computing fill-in for each pivot row to be merged in */
1620       prow = jl[k]; /* 1st pivot row */
1621 
1622       while (prow < k){
1623         nextprow = jl[prow];
1624 
1625         /* merge prow into k-th row */
1626         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1627         jmax = ui[prow+1];
1628         ncols = jmax-jmin;
1629         i     = jmin - ui[prow];
1630         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1631         for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1632         ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1633         nzk += nlnk;
1634 
1635         /* update il and jl for prow */
1636         if (jmin < jmax){
1637           il[prow] = jmin;
1638           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1639         }
1640         prow = nextprow;
1641       }
1642 
1643       /* if free space is not available, make more free space */
1644       if (current_space->local_remaining<nzk) {
1645         i = am - k + 1; /* num of unfactored rows */
1646         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1647         ierr = GetMoreSpace(i,&current_space);CHKERRQ(ierr);
1648         ierr = GetMoreSpace(i,&current_space_lvl);CHKERRQ(ierr);
1649         reallocs++;
1650       }
1651 
1652       /* copy data into free_space and free_space_lvl, then initialize lnk */
1653       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1654 
1655       /* add the k-th row into il and jl */
1656       if (nzk-1 > 0){
1657         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1658         jl[k] = jl[i]; jl[i] = k;
1659         il[k] = ui[k] + 1;
1660       }
1661       uj_ptr[k]     = current_space->array;
1662       uj_lvl_ptr[k] = current_space_lvl->array;
1663 
1664       current_space->array           += nzk;
1665       current_space->local_used      += nzk;
1666       current_space->local_remaining -= nzk;
1667       current_space_lvl->array           += nzk;
1668       current_space_lvl->local_used      += nzk;
1669       current_space_lvl->local_remaining -= nzk;
1670 
1671       ui[k+1] = ui[k] + nzk;
1672     }
1673 
1674     if (ai[am] != 0) {
1675       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1676       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af);
1677       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1678       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af);
1679     } else {
1680       PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n");
1681     }
1682 
1683     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1684     ierr = PetscFree(jl);CHKERRQ(ierr);
1685     ierr = PetscFree(cols_lvl);CHKERRQ(ierr);
1686 
1687     /* destroy list of free space and other temporary array(s) */
1688     ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr);
1689     ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
1690     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1691     ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr);
1692   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1693 
1694   /* put together the new matrix in MATSEQSBAIJ format */
1695   ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr);
1696   B = *fact;
1697   ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr);
1698   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1699 
1700   b = (Mat_SeqSBAIJ*)B->data;
1701   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1702   b->singlemalloc = PETSC_FALSE;
1703   /* the next line frees the default space generated by the Create() */
1704   ierr = PetscFree(b->a);CHKERRQ(ierr);
1705   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1706   ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr);
1707   b->j    = uj;
1708   b->i    = ui;
1709   b->diag = 0;
1710   b->ilen = 0;
1711   b->imax = 0;
1712   b->row  = perm;
1713   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1714   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1715   b->icol = perm;
1716   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1717   ierr    = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1718   ierr    = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
1719   b->maxnz = b->nz = ui[am];
1720 
1721   B->factor                 = FACTOR_CHOLESKY;
1722   B->info.factor_mallocs    = reallocs;
1723   B->info.fill_ratio_given  = fill;
1724   if (ai[am] != 0) {
1725     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1726   } else {
1727     B->info.fill_ratio_needed = 0.0;
1728   }
1729 
1730   if (perm_identity){
1731     switch (bs) {
1732       case 1:
1733         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1734         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1735         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1736         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1737         break;
1738       case 2:
1739         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1740         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1741         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1742         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1743         break;
1744       case 3:
1745         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1746         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1747         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1748         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1749         break;
1750       case 4:
1751         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1752         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1753         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1754         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1755         break;
1756       case 5:
1757         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1758         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1759         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1760         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1761         break;
1762       case 6:
1763         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1764         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1765         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1766         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1767         break;
1768       case 7:
1769         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1770         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1771         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1772         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1773       break;
1774       default:
1775         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1776         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1777         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1778         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1779       break;
1780     }
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785