xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 69a612a9be7fbcc0ab27d96ce41c4e3795ac7f2a)
1 /*
2     Factorization code for SBAIJ format.
3 */
4 
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 #include "src/inline/dot.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
13 {
14   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
15   IS              isrow=a->row;
16   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
17   PetscErrorCode ierr;
18   int             nz,*vj,k,*r,idx,k1;
19   int             bs=A->bs,bs2 = a->bs2;
20   MatScalar       *aa=a->a,*v,*diag;
21   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
22 
23   PetscFunctionBegin;
24   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
25   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
26   t  = a->solve_work;
27   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
28   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
29 
30   /* solve U^T * D * y = b by forward substitution */
31   xk = t;
32   for (k=0; k<mbs; k++) { /* t <- perm(b) */
33     idx   = bs*r[k];
34     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
35   }
36   for (k=0; k<mbs; k++){
37     v  = aa + bs2*ai[k];
38     xk = t + k*bs;      /* Dk*xk = k-th block of x */
39     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
40     nz = ai[k+1] - ai[k];
41     vj = aj + ai[k];
42     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
43     while (nz--) {
44       /* x(:) += U(k,:)^T*(Dk*xk) */
45       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
46       vj++; xj = t + (*vj)*bs;
47       v += bs2;
48     }
49     /* xk = inv(Dk)*(Dk*xk) */
50     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
51     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
52   }
53 
54   /* solve U*x = y by back substitution */
55   for (k=mbs-1; k>=0; k--){
56     v  = aa + bs2*ai[k];
57     xk = t + k*bs;        /* xk */
58     nz = ai[k+1] - ai[k];
59     vj = aj + ai[k];
60     xj = t + (*vj)*bs;
61     while (nz--) {
62       /* xk += U(k,:)*x(:) */
63       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
64       vj++;
65       v += bs2; xj = t + (*vj)*bs;
66     }
67     idx   = bs*r[k];
68     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
69   }
70 
71   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
72   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
73   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
74   PetscLogFlops(bs2*(2*a->nz + mbs));
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
80 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
81 {
82   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
83   PetscErrorCode ierr;
84   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
85   int             nz,*vj,k;
86   int             bs=A->bs,bs2 = a->bs2;
87   MatScalar       *aa=a->a,*v,*diag;
88   PetscScalar     *x,*xk,*xj,*b,*xk_tmp;
89 
90   PetscFunctionBegin;
91 
92   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
93   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
94 
95   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
96 
97   /* solve U^T * D * y = b by forward substitution */
98   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
99   for (k=0; k<mbs; k++){
100     v  = aa + bs2*ai[k];
101     xk = x + k*bs;      /* Dk*xk = k-th block of x */
102     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
103     nz = ai[k+1] - ai[k];
104     vj = aj + ai[k];
105     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
106     while (nz--) {
107       /* x(:) += U(k,:)^T*(Dk*xk) */
108       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
109       vj++; xj = x + (*vj)*bs;
110       v += bs2;
111     }
112     /* xk = inv(Dk)*(Dk*xk) */
113     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
114     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
115   }
116 
117   /* solve U*x = y by back substitution */
118   for (k=mbs-1; k>=0; k--){
119     v  = aa + bs2*ai[k];
120     xk = x + k*bs;        /* xk */
121     nz = ai[k+1] - ai[k];
122     vj = aj + ai[k];
123     xj = x + (*vj)*bs;
124     while (nz--) {
125       /* xk += U(k,:)*x(:) */
126       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
127       vj++;
128       v += bs2; xj = x + (*vj)*bs;
129     }
130   }
131 
132   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
133   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
134   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
135   PetscLogFlops(bs2*(2*a->nz + mbs));
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
141 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
142 {
143   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
144   IS              isrow=a->row;
145   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
146   PetscErrorCode ierr;
147   int             nz,*vj,k,*r,idx;
148   MatScalar       *aa=a->a,*v,*d;
149   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
150 
151   PetscFunctionBegin;
152   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
153   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
154   t  = a->solve_work;
155   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
156 
157   /* solve U^T * D * y = b by forward substitution */
158   tp = t;
159   for (k=0; k<mbs; k++) { /* t <- perm(b) */
160     idx   = 7*r[k];
161     tp[0] = b[idx];
162     tp[1] = b[idx+1];
163     tp[2] = b[idx+2];
164     tp[3] = b[idx+3];
165     tp[4] = b[idx+4];
166     tp[5] = b[idx+5];
167     tp[6] = b[idx+6];
168     tp += 7;
169   }
170 
171   for (k=0; k<mbs; k++){
172     v  = aa + 49*ai[k];
173     vj = aj + ai[k];
174     tp = t + k*7;
175     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
176     nz = ai[k+1] - ai[k];
177     tp = t + (*vj)*7;
178     while (nz--) {
179       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
180       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
181       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
182       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
183       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
184       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
185       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
186       vj++; tp = t + (*vj)*7;
187       v += 49;
188     }
189 
190     /* xk = inv(Dk)*(Dk*xk) */
191     d  = aa+k*49;          /* ptr to inv(Dk) */
192     tp    = t + k*7;
193     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
194     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
195     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
196     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
197     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
198     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
199     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
200   }
201 
202   /* solve U*x = y by back substitution */
203   for (k=mbs-1; k>=0; k--){
204     v  = aa + 49*ai[k];
205     vj = aj + ai[k];
206     tp    = t + k*7;
207     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
208     nz = ai[k+1] - ai[k];
209 
210     tp = t + (*vj)*7;
211     while (nz--) {
212       /* xk += U(k,:)*x(:) */
213       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
214       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
215       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
216       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
217       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
218       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
219       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
220       vj++; tp = t + (*vj)*7;
221       v += 49;
222     }
223     tp    = t + k*7;
224     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
225     idx   = 7*r[k];
226     x[idx]     = x0;
227     x[idx+1]   = x1;
228     x[idx+2]   = x2;
229     x[idx+3]   = x3;
230     x[idx+4]   = x4;
231     x[idx+5]   = x5;
232     x[idx+6]   = x6;
233   }
234 
235   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
236   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
237   PetscLogFlops(49*(2*a->nz + mbs));
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
243 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
244 {
245   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
246   PetscErrorCode ierr;
247   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
248   MatScalar       *aa=a->a,*v,*d;
249   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
250   int             nz,*vj,k;
251 
252   PetscFunctionBegin;
253 
254   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
255   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
256 
257   /* solve U^T * D * y = b by forward substitution */
258   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
259   for (k=0; k<mbs; k++){
260     v  = aa + 49*ai[k];
261     xp = x + k*7;
262     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
263     nz = ai[k+1] - ai[k];
264     vj = aj + ai[k];
265     xp = x + (*vj)*7;
266     while (nz--) {
267       /* x(:) += U(k,:)^T*(Dk*xk) */
268       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
269       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
270       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
271       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
272       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
273       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
274       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
275       vj++; xp = x + (*vj)*7;
276       v += 49;
277     }
278     /* xk = inv(Dk)*(Dk*xk) */
279     d  = aa+k*49;          /* ptr to inv(Dk) */
280     xp = x + k*7;
281     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
282     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
283     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
284     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
285     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
286     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
287     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
288   }
289 
290   /* solve U*x = y by back substitution */
291   for (k=mbs-1; k>=0; k--){
292     v  = aa + 49*ai[k];
293     xp = x + k*7;
294     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
295     nz = ai[k+1] - ai[k];
296     vj = aj + ai[k];
297     xp = x + (*vj)*7;
298     while (nz--) {
299       /* xk += U(k,:)*x(:) */
300       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
301       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
302       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
303       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
304       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
305       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
306       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
307       vj++;
308       v += 49; xp = x + (*vj)*7;
309     }
310     xp = x + k*7;
311     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
312   }
313 
314   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
315   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
316   PetscLogFlops(49*(2*a->nz + mbs));
317   PetscFunctionReturn(0);
318 }
319 
320 #undef __FUNCT__
321 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
322 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
323 {
324   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
325   IS              isrow=a->row;
326   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
327   PetscErrorCode ierr;
328   int             nz,*vj,k,*r,idx;
329   MatScalar       *aa=a->a,*v,*d;
330   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
331 
332   PetscFunctionBegin;
333   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
334   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
335   t  = a->solve_work;
336   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
337 
338   /* solve U^T * D * y = b by forward substitution */
339   tp = t;
340   for (k=0; k<mbs; k++) { /* t <- perm(b) */
341     idx   = 6*r[k];
342     tp[0] = b[idx];
343     tp[1] = b[idx+1];
344     tp[2] = b[idx+2];
345     tp[3] = b[idx+3];
346     tp[4] = b[idx+4];
347     tp[5] = b[idx+5];
348     tp += 6;
349   }
350 
351   for (k=0; k<mbs; k++){
352     v  = aa + 36*ai[k];
353     vj = aj + ai[k];
354     tp = t + k*6;
355     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
356     nz = ai[k+1] - ai[k];
357     tp = t + (*vj)*6;
358     while (nz--) {
359       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
360       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
361       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
362       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
363       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
364       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
365       vj++; tp = t + (*vj)*6;
366       v += 36;
367     }
368 
369     /* xk = inv(Dk)*(Dk*xk) */
370     d  = aa+k*36;          /* ptr to inv(Dk) */
371     tp    = t + k*6;
372     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
373     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
374     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
375     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
376     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
377     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
378   }
379 
380   /* solve U*x = y by back substitution */
381   for (k=mbs-1; k>=0; k--){
382     v  = aa + 36*ai[k];
383     vj = aj + ai[k];
384     tp    = t + k*6;
385     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
386     nz = ai[k+1] - ai[k];
387 
388     tp = t + (*vj)*6;
389     while (nz--) {
390       /* xk += U(k,:)*x(:) */
391       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
392       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
393       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
394       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
395       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
396       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
397       vj++; tp = t + (*vj)*6;
398       v += 36;
399     }
400     tp    = t + k*6;
401     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
402     idx   = 6*r[k];
403     x[idx]     = x0;
404     x[idx+1]   = x1;
405     x[idx+2]   = x2;
406     x[idx+3]   = x3;
407     x[idx+4]   = x4;
408     x[idx+5]   = x5;
409   }
410 
411   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
412   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
413   PetscLogFlops(36*(2*a->nz + mbs));
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
419 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
420 {
421   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
422   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
423   MatScalar       *aa=a->a,*v,*d;
424   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5;
425   PetscErrorCode ierr;
426   int             nz,*vj,k;
427 
428   PetscFunctionBegin;
429 
430   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
431   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
432 
433   /* solve U^T * D * y = b by forward substitution */
434   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
435   for (k=0; k<mbs; k++){
436     v  = aa + 36*ai[k];
437     xp = x + k*6;
438     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
439     nz = ai[k+1] - ai[k];
440     vj = aj + ai[k];
441     xp = x + (*vj)*6;
442     while (nz--) {
443       /* x(:) += U(k,:)^T*(Dk*xk) */
444       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
445       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
446       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
447       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
448       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
449       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
450       vj++; xp = x + (*vj)*6;
451       v += 36;
452     }
453     /* xk = inv(Dk)*(Dk*xk) */
454     d  = aa+k*36;          /* ptr to inv(Dk) */
455     xp = x + k*6;
456     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
457     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
458     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
459     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
460     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
461     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
462   }
463 
464   /* solve U*x = y by back substitution */
465   for (k=mbs-1; k>=0; k--){
466     v  = aa + 36*ai[k];
467     xp = x + k*6;
468     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
469     nz = ai[k+1] - ai[k];
470     vj = aj + ai[k];
471     xp = x + (*vj)*6;
472     while (nz--) {
473       /* xk += U(k,:)*x(:) */
474       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
475       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
476       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
477       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
478       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
479       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
480       vj++;
481       v += 36; xp = x + (*vj)*6;
482     }
483     xp = x + k*6;
484     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
485   }
486 
487   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
488   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
489   PetscLogFlops(36*(2*a->nz + mbs));
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
495 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
496 {
497   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
498   IS              isrow=a->row;
499   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
500   PetscErrorCode ierr;
501   int             nz,*vj,k,*r,idx;
502   MatScalar       *aa=a->a,*v,*diag;
503   PetscScalar     *x,*b,x0,x1,x2,x3,x4,*t,*tp;
504 
505   PetscFunctionBegin;
506   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
507   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
508   t  = a->solve_work;
509   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
510 
511   /* solve U^T * D * y = b by forward substitution */
512   tp = t;
513   for (k=0; k<mbs; k++) { /* t <- perm(b) */
514     idx   = 5*r[k];
515     tp[0] = b[idx];
516     tp[1] = b[idx+1];
517     tp[2] = b[idx+2];
518     tp[3] = b[idx+3];
519     tp[4] = b[idx+4];
520     tp += 5;
521   }
522 
523   for (k=0; k<mbs; k++){
524     v  = aa + 25*ai[k];
525     vj = aj + ai[k];
526     tp = t + k*5;
527     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
528     nz = ai[k+1] - ai[k];
529 
530     tp = t + (*vj)*5;
531     while (nz--) {
532       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
533       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
534       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
535       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
536       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
537       vj++; tp = t + (*vj)*5;
538       v += 25;
539     }
540 
541     /* xk = inv(Dk)*(Dk*xk) */
542     diag  = aa+k*25;          /* ptr to inv(Dk) */
543     tp    = t + k*5;
544       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
545       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
546       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
547       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
548       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
549   }
550 
551   /* solve U*x = y by back substitution */
552   for (k=mbs-1; k>=0; k--){
553     v  = aa + 25*ai[k];
554     vj = aj + ai[k];
555     tp    = t + k*5;
556     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
557     nz = ai[k+1] - ai[k];
558 
559     tp = t + (*vj)*5;
560     while (nz--) {
561       /* xk += U(k,:)*x(:) */
562       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
563       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
564       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
565       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
566       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
567       vj++; tp = t + (*vj)*5;
568       v += 25;
569     }
570     tp    = t + k*5;
571     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
572     idx   = 5*r[k];
573     x[idx]     = x0;
574     x[idx+1]   = x1;
575     x[idx+2]   = x2;
576     x[idx+3]   = x3;
577     x[idx+4]   = x4;
578   }
579 
580   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
581   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
582   PetscLogFlops(25*(2*a->nz + mbs));
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
588 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
589 {
590   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
591   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
592   MatScalar       *aa=a->a,*v,*diag;
593   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4;
594   PetscErrorCode ierr;
595   int             nz,*vj,k;
596 
597   PetscFunctionBegin;
598 
599   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
600   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
601 
602   /* solve U^T * D * y = b by forward substitution */
603   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
604   for (k=0; k<mbs; k++){
605     v  = aa + 25*ai[k];
606     xp = x + k*5;
607     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
608     nz = ai[k+1] - ai[k];
609     vj = aj + ai[k];
610     xp = x + (*vj)*5;
611     while (nz--) {
612       /* x(:) += U(k,:)^T*(Dk*xk) */
613       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
614       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
615       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
616       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
617       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
618       vj++; xp = x + (*vj)*5;
619       v += 25;
620     }
621     /* xk = inv(Dk)*(Dk*xk) */
622     diag = aa+k*25;          /* ptr to inv(Dk) */
623     xp   = x + k*5;
624     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
625     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
626     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
627     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
628     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
629   }
630 
631   /* solve U*x = y by back substitution */
632   for (k=mbs-1; k>=0; k--){
633     v  = aa + 25*ai[k];
634     xp = x + k*5;
635     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
636     nz = ai[k+1] - ai[k];
637     vj = aj + ai[k];
638     xp = x + (*vj)*5;
639     while (nz--) {
640       /* xk += U(k,:)*x(:) */
641       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
642       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
643       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
644       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
645       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
646       vj++;
647       v += 25; xp = x + (*vj)*5;
648     }
649     xp = x + k*5;
650     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
651   }
652 
653   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
654   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
655   PetscLogFlops(25*(2*a->nz + mbs));
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
661 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
662 {
663   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
664   IS              isrow=a->row;
665   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
666   PetscErrorCode ierr;
667   int             nz,*vj,k,*r,idx;
668   MatScalar       *aa=a->a,*v,*diag;
669   PetscScalar     *x,*b,x0,x1,x2,x3,*t,*tp;
670 
671   PetscFunctionBegin;
672   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
673   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
674   t  = a->solve_work;
675   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
676 
677   /* solve U^T * D * y = b by forward substitution */
678   tp = t;
679   for (k=0; k<mbs; k++) { /* t <- perm(b) */
680     idx   = 4*r[k];
681     tp[0] = b[idx];
682     tp[1] = b[idx+1];
683     tp[2] = b[idx+2];
684     tp[3] = b[idx+3];
685     tp += 4;
686   }
687 
688   for (k=0; k<mbs; k++){
689     v  = aa + 16*ai[k];
690     vj = aj + ai[k];
691     tp = t + k*4;
692     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
693     nz = ai[k+1] - ai[k];
694 
695     tp = t + (*vj)*4;
696     while (nz--) {
697       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
698       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
699       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
700       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
701       vj++; tp = t + (*vj)*4;
702       v += 16;
703     }
704 
705     /* xk = inv(Dk)*(Dk*xk) */
706     diag  = aa+k*16;          /* ptr to inv(Dk) */
707     tp    = t + k*4;
708     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
709     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
710     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
711     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
712   }
713 
714   /* solve U*x = y by back substitution */
715   for (k=mbs-1; k>=0; k--){
716     v  = aa + 16*ai[k];
717     vj = aj + ai[k];
718     tp    = t + k*4;
719     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
720     nz = ai[k+1] - ai[k];
721 
722     tp = t + (*vj)*4;
723     while (nz--) {
724       /* xk += U(k,:)*x(:) */
725       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
726       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
727       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
728       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
729       vj++; tp = t + (*vj)*4;
730       v += 16;
731     }
732     tp    = t + k*4;
733     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
734     idx        = 4*r[k];
735     x[idx]     = x0;
736     x[idx+1]   = x1;
737     x[idx+2]   = x2;
738     x[idx+3]   = x3;
739   }
740 
741   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
742   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
743   PetscLogFlops(16*(2*a->nz + mbs));
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748    Special case where the matrix was factored in the natural ordering.
749    This eliminates the need for the column and row permutation.
750 */
751 #undef __FUNCT__
752 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
753 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
754 {
755   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
756   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
757   MatScalar       *aa=a->a,*v,*diag;
758   PetscScalar     *x,*xp,*b,x0,x1,x2,x3;
759   PetscErrorCode ierr;
760   int             nz,*vj,k;
761 
762   PetscFunctionBegin;
763 
764   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
765   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
766 
767   /* solve U^T * D * y = b by forward substitution */
768   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
769   for (k=0; k<mbs; k++){
770     v  = aa + 16*ai[k];
771     xp = x + k*4;
772     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
773     nz = ai[k+1] - ai[k];
774     vj = aj + ai[k];
775     xp = x + (*vj)*4;
776     while (nz--) {
777       /* x(:) += U(k,:)^T*(Dk*xk) */
778       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
779       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
780       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
781       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
782       vj++; xp = x + (*vj)*4;
783       v += 16;
784     }
785     /* xk = inv(Dk)*(Dk*xk) */
786     diag = aa+k*16;          /* ptr to inv(Dk) */
787     xp   = x + k*4;
788     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
789     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
790     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
791     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
792   }
793 
794   /* solve U*x = y by back substitution */
795   for (k=mbs-1; k>=0; k--){
796     v  = aa + 16*ai[k];
797     xp = x + k*4;
798     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
799     nz = ai[k+1] - ai[k];
800     vj = aj + ai[k];
801     xp = x + (*vj)*4;
802     while (nz--) {
803       /* xk += U(k,:)*x(:) */
804       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
805       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
806       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
807       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
808       vj++;
809       v += 16; xp = x + (*vj)*4;
810     }
811     xp = x + k*4;
812     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
813   }
814 
815   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
816   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
817   PetscLogFlops(16*(2*a->nz + mbs));
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
823 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
824 {
825   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
826   IS              isrow=a->row;
827   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
828   PetscErrorCode ierr;
829   int             nz,*vj,k,*r,idx;
830   MatScalar       *aa=a->a,*v,*diag;
831   PetscScalar     *x,*b,x0,x1,x2,*t,*tp;
832 
833   PetscFunctionBegin;
834   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
835   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
836   t  = a->solve_work;
837   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
838 
839   /* solve U^T * D * y = b by forward substitution */
840   tp = t;
841   for (k=0; k<mbs; k++) { /* t <- perm(b) */
842     idx   = 3*r[k];
843     tp[0] = b[idx];
844     tp[1] = b[idx+1];
845     tp[2] = b[idx+2];
846     tp += 3;
847   }
848 
849   for (k=0; k<mbs; k++){
850     v  = aa + 9*ai[k];
851     vj = aj + ai[k];
852     tp = t + k*3;
853     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
854     nz = ai[k+1] - ai[k];
855 
856     tp = t + (*vj)*3;
857     while (nz--) {
858       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
859       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
860       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
861       vj++; tp = t + (*vj)*3;
862       v += 9;
863     }
864 
865     /* xk = inv(Dk)*(Dk*xk) */
866     diag  = aa+k*9;          /* ptr to inv(Dk) */
867     tp    = t + k*3;
868     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
869     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
870     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
871   }
872 
873   /* solve U*x = y by back substitution */
874   for (k=mbs-1; k>=0; k--){
875     v  = aa + 9*ai[k];
876     vj = aj + ai[k];
877     tp    = t + k*3;
878     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
879     nz = ai[k+1] - ai[k];
880 
881     tp = t + (*vj)*3;
882     while (nz--) {
883       /* xk += U(k,:)*x(:) */
884       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
885       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
886       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
887       vj++; tp = t + (*vj)*3;
888       v += 9;
889     }
890     tp    = t + k*3;
891     tp[0] = x0; tp[1] = x1; tp[2] = x2;
892     idx      = 3*r[k];
893     x[idx]   = x0;
894     x[idx+1] = x1;
895     x[idx+2] = x2;
896   }
897 
898   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
899   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
900   PetscLogFlops(9*(2*a->nz + mbs));
901   PetscFunctionReturn(0);
902 }
903 
904 /*
905    Special case where the matrix was factored in the natural ordering.
906    This eliminates the need for the column and row permutation.
907 */
908 #undef __FUNCT__
909 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
910 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
911 {
912   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
913   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
914   MatScalar       *aa=a->a,*v,*diag;
915   PetscScalar     *x,*xp,*b,x0,x1,x2;
916   PetscErrorCode ierr;
917   int             nz,*vj,k;
918 
919   PetscFunctionBegin;
920 
921   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
922   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
923 
924   /* solve U^T * D * y = b by forward substitution */
925   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
926   for (k=0; k<mbs; k++){
927     v  = aa + 9*ai[k];
928     xp = x + k*3;
929     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
930     nz = ai[k+1] - ai[k];
931     vj = aj + ai[k];
932     xp = x + (*vj)*3;
933     while (nz--) {
934       /* x(:) += U(k,:)^T*(Dk*xk) */
935       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
936       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
937       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
938       vj++; xp = x + (*vj)*3;
939       v += 9;
940     }
941     /* xk = inv(Dk)*(Dk*xk) */
942     diag = aa+k*9;          /* ptr to inv(Dk) */
943     xp   = x + k*3;
944     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
945     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
946     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
947   }
948 
949   /* solve U*x = y by back substitution */
950   for (k=mbs-1; k>=0; k--){
951     v  = aa + 9*ai[k];
952     xp = x + k*3;
953     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
954     nz = ai[k+1] - ai[k];
955     vj = aj + ai[k];
956     xp = x + (*vj)*3;
957     while (nz--) {
958       /* xk += U(k,:)*x(:) */
959       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
960       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
961       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
962       vj++;
963       v += 9; xp = x + (*vj)*3;
964     }
965     xp = x + k*3;
966     xp[0] = x0; xp[1] = x1; xp[2] = x2;
967   }
968 
969   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   PetscLogFlops(9*(2*a->nz + mbs));
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
977 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
978 {
979   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ *)A->data;
980   IS              isrow=a->row;
981   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
982   PetscErrorCode ierr;
983   int             nz,*vj,k,k2,*r,idx;
984   MatScalar       *aa=a->a,*v,*diag;
985   PetscScalar     *x,*b,x0,x1,*t;
986 
987   PetscFunctionBegin;
988   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
989   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
990   t  = a->solve_work;
991   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
992   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
993 
994   /* solve U^T * D * y = perm(b) by forward substitution */
995   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
996     idx = 2*r[k];
997     t[k*2]   = b[idx];
998     t[k*2+1] = b[idx+1];
999   }
1000   for (k=0; k<mbs; k++){
1001     v  = aa + 4*ai[k];
1002     vj = aj + ai[k];
1003     k2 = k*2;
1004     x0 = t[k2]; x1 = t[k2+1];
1005     nz = ai[k+1] - ai[k];
1006     while (nz--) {
1007       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1008       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1009       vj++; v += 4;
1010     }
1011     diag = aa+k*4;  /* ptr to inv(Dk) */
1012     t[k2]   = diag[0]*x0 + diag[2]*x1;
1013     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1014   }
1015 
1016   /* solve U*x = y by back substitution */
1017   for (k=mbs-1; k>=0; k--){
1018     v  = aa + 4*ai[k];
1019     vj = aj + ai[k];
1020     k2 = k*2;
1021     x0 = t[k2]; x1 = t[k2+1];
1022     nz = ai[k+1] - ai[k];
1023     while (nz--) {
1024       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1025       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1026       vj++; v += 4;
1027     }
1028     t[k2]    = x0;
1029     t[k2+1]  = x1;
1030     idx      = 2*r[k];
1031     x[idx]   = x0;
1032     x[idx+1] = x1;
1033   }
1034 
1035   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1036   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1037   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1038   PetscLogFlops(4*(2*a->nz + mbs));
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 /*
1043    Special case where the matrix was factored in the natural ordering.
1044    This eliminates the need for the column and row permutation.
1045 */
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1048 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1049 {
1050   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1051   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1052   MatScalar       *aa=a->a,*v,*diag;
1053   PetscScalar     *x,*b,x0,x1;
1054   PetscErrorCode ierr;
1055   int             nz,*vj,k,k2;
1056 
1057   PetscFunctionBegin;
1058 
1059   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1060   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1061 
1062   /* solve U^T * D * y = b by forward substitution */
1063   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1064   for (k=0; k<mbs; k++){
1065     v  = aa + 4*ai[k];
1066     vj = aj + ai[k];
1067     k2 = k*2;
1068     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1069     nz = ai[k+1] - ai[k];
1070 
1071     while (nz--) {
1072       /* x(:) += U(k,:)^T*(Dk*xk) */
1073       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1074       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1075       vj++; v += 4;
1076     }
1077     /* xk = inv(Dk)*(Dk*xk) */
1078     diag = aa+k*4;          /* ptr to inv(Dk) */
1079     x[k2]   = diag[0]*x0 + diag[2]*x1;
1080     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1081   }
1082 
1083   /* solve U*x = y by back substitution */
1084   for (k=mbs-1; k>=0; k--){
1085     v  = aa + 4*ai[k];
1086     vj = aj + ai[k];
1087     k2 = k*2;
1088     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1089     nz = ai[k+1] - ai[k];
1090     while (nz--) {
1091       /* xk += U(k,:)*x(:) */
1092       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1093       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1094       vj++; v += 4;
1095     }
1096     x[k2]     = x0;
1097     x[k2+1]   = x1;
1098   }
1099 
1100   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1101   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1102   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1108 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1109 {
1110   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ *)A->data;
1111   IS              isrow=a->row;
1112   PetscErrorCode ierr;
1113   int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip;
1114   MatScalar       *aa=a->a,*v;
1115   PetscScalar     *x,*b,xk,*t;
1116   int             nz,*vj,k;
1117 
1118   PetscFunctionBegin;
1119   if (!mbs) PetscFunctionReturn(0);
1120 
1121   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1122   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1123   t    = a->solve_work;
1124 
1125   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1126 
1127   /* solve U^T*D*y = perm(b) by forward substitution */
1128   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1129   for (k=0; k<mbs; k++){
1130     v  = aa + ai[k];
1131     vj = aj + ai[k];
1132     xk = t[k];
1133     nz = ai[k+1] - ai[k];
1134     while (nz--) t[*vj++] += (*v++) * xk;
1135     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1136   }
1137 
1138   /* solve U*x = y by back substitution */
1139   for (k=mbs-1; k>=0; k--){
1140     v  = aa + ai[k];
1141     vj = aj + ai[k];
1142     xk = t[k];
1143     nz = ai[k+1] - ai[k];
1144     while (nz--) xk += (*v++) * t[*vj++];
1145     t[k]      = xk;
1146     x[rip[k]] = xk;
1147   }
1148 
1149   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1150   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1151   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1152   PetscLogFlops(4*a->nz + A->m);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1158 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1159 {
1160   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   if (A->bs == 1) {
1165     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1166   } else {
1167     IS              isrow=a->row;
1168     int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1169     MatScalar       *aa=a->a,*v;
1170     PetscScalar     *x,*b,*t;
1171     int             nz,*vj,k,n;
1172     if (bb->n > a->solves_work_n) {
1173       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1174       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1175       a->solves_work_n = bb->n;
1176     }
1177     n    = bb->n;
1178     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1179     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1180     t    = a->solves_work;
1181 
1182     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1183 
1184     /* solve U^T*D*y = perm(b) by forward substitution */
1185     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1186     for (k=0; k<mbs; k++){
1187       v  = aa + ai[k];
1188       vj = aj + ai[k];
1189       nz = ai[k+1] - ai[k];
1190       while (nz--) {
1191         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1192         v++;vj++;
1193       }
1194       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1195     }
1196 
1197     /* solve U*x = y by back substitution */
1198     for (k=mbs-1; k>=0; k--){
1199       v  = aa + ai[k];
1200       vj = aj + ai[k];
1201       nz = ai[k+1] - ai[k];
1202       while (nz--) {
1203         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1204         v++;vj++;
1205       }
1206       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1207     }
1208 
1209     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1210     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1211     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1212     PetscLogFlops(bb->n*(4*a->nz + A->m));
1213   }
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*
1218       Special case where the matrix was ILU(0) factored in the natural
1219    ordering. This eliminates the need for the column and row permutation.
1220 */
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1223 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1224 {
1225   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1226   PetscErrorCode ierr;
1227   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1228   MatScalar       *aa=a->a,*v;
1229   PetscScalar     *x,*b,xk;
1230   int             nz,*vj,k;
1231 
1232   PetscFunctionBegin;
1233   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1234   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1235 
1236   /* solve U^T*D*y = b by forward substitution */
1237   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1238   for (k=0; k<mbs; k++){
1239     v  = aa + ai[k] + 1;
1240     vj = aj + ai[k] + 1;
1241     xk = x[k];
1242     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1243     while (nz--) x[*vj++] += (*v++) * xk;
1244     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1245   }
1246 
1247   /* solve U*x = y by back substitution */
1248   for (k=mbs-2; k>=0; k--){
1249     v  = aa + ai[k] + 1;
1250     vj = aj + ai[k] + 1;
1251     xk = x[k];
1252     nz = ai[k+1] - ai[k] - 1;
1253     while (nz--) xk += (*v++) * x[*vj++];
1254     x[k] = xk;
1255   }
1256 
1257   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1258   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1259   PetscLogFlops(4*a->nz + A->m);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1266 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1267 {
1268   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1269   PetscErrorCode ierr;
1270   int         *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1271   int         *jutmp,bs = A->bs,bs2=a->bs2;
1272   int         m,reallocs = 0,*levtmp;
1273   int         *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1274   int         incrlev,*lev,shift,prow,nz;
1275   int         *il,ili,nextprow;
1276   PetscReal   f = info->fill,levels = info->levels;
1277   PetscTruth  perm_identity;
1278 
1279   PetscFunctionBegin;
1280   /* check whether perm is the identity mapping */
1281   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1282 
1283   /* special case that simply copies fill pattern */
1284   if (!levels && perm_identity && bs==1) {
1285     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1286     (*B)->factor    = FACTOR_CHOLESKY;
1287     b               = (Mat_SeqSBAIJ*)(*B)->data;
1288     b->row          = perm;
1289     b->icol         = perm;
1290     b->factor_damping   = info->damping;
1291     b->factor_shift     = info->shift;
1292     b->factor_zeropivot = info->zeropivot;
1293     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1294     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1295     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1296     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1297     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1298     PetscFunctionReturn(0);
1299   }
1300 
1301   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1302   if (levels > 0 && perm_identity && bs==1 ){
1303     if (!perm_identity) a->permute = PETSC_TRUE;
1304 
1305   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1306 
1307   if (perm_identity){ /* without permutation */
1308     ai = a->i; aj = a->j;
1309   } else {            /* non-trivial permutation */
1310     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1311     ai = a->inew; aj = a->jnew;
1312   }
1313 
1314   /* initialization */
1315   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1316   umax  = (int)(f*ai[mbs] + 1);
1317   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1318   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1319   iu[0] = 0;
1320   juidx = 0; /* index for ju */
1321   ierr  = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1322   q      = jl + mbs;   /* linked list for col index of active row */
1323   levtmp = q + mbs;
1324   il     = levtmp + mbs;
1325   for (i=0; i<mbs; i++){
1326     jl[i] = mbs;
1327     q[i]  = 0;
1328     il[i] = 0;
1329   }
1330 
1331   /* for each row k */
1332   for (k=0; k<mbs; k++){
1333     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1334     q[k] = mbs;
1335     /* initialize nonzero structure of k-th row to row rip[k] of A */
1336     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1337     jmax = ai[rip[k]+1];
1338     for (j=jmin; j<jmax; j++){
1339       vj = rip[aj[j]]; /* col. value */
1340       if(vj > k){
1341         qm = k;
1342         do {
1343           m  = qm; qm = q[m];
1344         } while(qm < vj);
1345         if (qm == vj) {
1346           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1347         }
1348         nzk++;
1349         q[m]  = vj;
1350         q[vj] = qm;
1351         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1352       } /* if(vj > k) */
1353     } /* for (j=jmin; j<jmax; j++) */
1354 
1355     /* modify nonzero structure of k-th row by computing fill-in
1356        for each row i to be merged in */
1357     prow = k;
1358     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1359 
1360     while (prow < k){
1361       nextprow = jl[prow];
1362       /* merge row prow into k-th row */
1363       ili  = il[prow];
1364       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1365       jmax = iu[prow+1];
1366       qm   = k;
1367       for (j=jmin; j<jmax; j++){
1368         vj = ju[j];
1369         incrlev = lev[j] + 1;
1370         if (incrlev > levels) continue;
1371         do {
1372           m = qm; qm = q[m];
1373         } while (qm < vj);
1374         if (qm != vj){  /* a fill */
1375           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1376           levtmp[vj]  = incrlev;
1377         } else {
1378           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1379         }
1380       }
1381       if (jmin < jmax){ /* update il and jl */
1382         il[prow] = jmin;
1383         j = ju[jmin];
1384         jl[prow] = jl[j]; jl[j] = prow;
1385       }
1386       prow = nextprow;
1387     }
1388 
1389     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1390     if (nzk > 0){
1391       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1392       jl[k] = jl[i]; jl[i] = k;
1393       il[k] = iu[k] + 1;
1394     }
1395     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1396 
1397     /* allocate more space to ju if needed */
1398     if (iu[k+1] > umax) {
1399       /* estimate how much additional space we will need */
1400       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1401       /* just double the memory each time */
1402       maxadd = umax;
1403       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1404       umax += maxadd;
1405 
1406       /* allocate a longer ju */
1407       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1408       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1409       ierr = PetscFree(ju);CHKERRQ(ierr);
1410       ju   = jutmp;
1411 
1412       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1413       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr);
1414       ierr     = PetscFree(lev);CHKERRQ(ierr);
1415       lev      = jutmp;
1416       reallocs += 2; /* count how many times we realloc */
1417     }
1418 
1419     /* save nonzero structure of k-th row in ju */
1420     ju[juidx]  = k; /* diag[k] */
1421     lev[juidx] = 0;
1422     juidx++;
1423     i = k;
1424     while (nzk --) {
1425       i           = q[i];
1426       ju[juidx] = i;
1427       lev[juidx] = levtmp[i];
1428       juidx++;
1429     }
1430   } /* end of for (k=0; k<mbs; k++) */
1431 
1432   if (ai[mbs] != 0) {
1433     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1434     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1435     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1436     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1437     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1438   } else {
1439      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1440   }
1441 
1442   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1443   ierr = PetscFree(jl);CHKERRQ(ierr);
1444   ierr = PetscFree(lev);CHKERRQ(ierr);
1445 
1446   /* put together the new matrix */
1447   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1448   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* PetscLogObjectParent(*B,iperm); */
1452   b = (Mat_SeqSBAIJ*)(*B)->data;
1453   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1454   b->singlemalloc = PETSC_FALSE;
1455   /* the next line frees the default space generated by the Create() */
1456   ierr = PetscFree(b->a);CHKERRQ(ierr);
1457   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1458   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1459   b->j    = ju;
1460   b->i    = iu;
1461   b->diag = 0;
1462   b->ilen = 0;
1463   b->imax = 0;
1464   b->row  = perm;
1465   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1466   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1467   b->icol = perm;
1468   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1469   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1470   /* In b structure:  Free imax, ilen, old a, old j.
1471      Allocate idnew, solve_work, new a, new j */
1472   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1473   b->maxnz          = b->nz = iu[mbs];
1474   b->factor_damping   = info->damping;
1475   b->factor_shift     = info->shift;
1476   b->factor_zeropivot = info->zeropivot;
1477 
1478   (*B)->factor                 = FACTOR_CHOLESKY;
1479   (*B)->info.factor_mallocs    = reallocs;
1480   (*B)->info.fill_ratio_given  = f;
1481   if (ai[mbs] != 0) {
1482     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1483   } else {
1484     (*B)->info.fill_ratio_needed = 0.0;
1485   }
1486 
1487 
1488   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1489   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1490   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1491 
1492   PetscFunctionReturn(0);
1493   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1494 
1495   if (!perm_identity) a->permute = PETSC_TRUE;
1496   if (perm_identity){
1497     ai = a->i; aj = a->j;
1498   } else { /*  non-trivial permutation */
1499     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1500     ai = a->inew; aj = a->jnew;
1501   }
1502 
1503   /* initialization */
1504   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1505   umax  = (int)(f*ai[mbs] + 1);
1506   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1507   umax += mbs + 1;
1508   shift = mbs + 1;
1509   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1510   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1511   iu[0] = mbs + 1;
1512   juidx = mbs + 1;
1513   /* prowl: linked list for pivot row */
1514   ierr    = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr);
1515   /* q: linked list for col index */
1516   q       = prowl + mbs;
1517   levtmp  = q     + mbs;
1518 
1519   for (i=0; i<mbs; i++){
1520     prowl[i] = mbs;
1521     q[i] = 0;
1522   }
1523 
1524   /* for each row k */
1525   for (k=0; k<mbs; k++){
1526     nzk  = 0;
1527     q[k] = mbs;
1528     /* copy current row into linked list */
1529     nz = ai[rip[k]+1] - ai[rip[k]];
1530     j = ai[rip[k]];
1531     while (nz--){
1532       vj = rip[aj[j++]];
1533       if (vj > k){
1534         qm = k;
1535         do {
1536           m  = qm; qm = q[m];
1537         } while(qm < vj);
1538         if (qm == vj) {
1539           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1540         }
1541         nzk++;
1542         q[m]       = vj;
1543         q[vj]      = qm;
1544         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1545       }
1546     }
1547 
1548     /* modify nonzero structure of k-th row by computing fill-in
1549        for each row prow to be merged in */
1550     prow = k;
1551     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1552 
1553     while (prow < k){
1554       /* merge row prow into k-th row */
1555       jmin = iu[prow] + 1;
1556       jmax = iu[prow+1];
1557       qm = k;
1558       for (j=jmin; j<jmax; j++){
1559         incrlev = lev[j-shift] + 1;
1560 	if (incrlev > levels) continue;
1561 
1562         vj      = ju[j];
1563         do {
1564           m = qm; qm = q[m];
1565         } while (qm < vj);
1566         if (qm != vj){      /* a fill */
1567           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1568           levtmp[vj] = incrlev;
1569         } else {
1570           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1571         }
1572       }
1573       prow = prowl[prow]; /* next pivot row */
1574     }
1575 
1576     /* add k to row list for first nonzero element in k-th row */
1577     if (nzk > 1){
1578       i = q[k]; /* col value of first nonzero element in k_th row of U */
1579       prowl[k] = prowl[i]; prowl[i] = k;
1580     }
1581     iu[k+1] = iu[k] + nzk;
1582 
1583     /* allocate more space to ju and lev if needed */
1584     if (iu[k+1] > umax) {
1585       /* estimate how much additional space we will need */
1586       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1587       /* just double the memory each time */
1588       maxadd = umax;
1589       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1590       umax += maxadd;
1591 
1592       /* allocate a longer ju */
1593       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1594       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1595       ierr     = PetscFree(ju);CHKERRQ(ierr);
1596       ju       = jutmp;
1597 
1598       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1599       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr);
1600       ierr     = PetscFree(lev);CHKERRQ(ierr);
1601       lev      = jutmp;
1602       reallocs += 2; /* count how many times we realloc */
1603     }
1604 
1605     /* save nonzero structure of k-th row in ju */
1606     i=k;
1607     while (nzk --) {
1608       i                = q[i];
1609       ju[juidx]        = i;
1610       lev[juidx-shift] = levtmp[i];
1611       juidx++;
1612     }
1613   }
1614 
1615   if (ai[mbs] != 0) {
1616     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1617     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1618     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1619     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1620     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1621   } else {
1622     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1623   }
1624 
1625   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1626   ierr = PetscFree(prowl);CHKERRQ(ierr);
1627   ierr = PetscFree(lev);CHKERRQ(ierr);
1628 
1629   /* put together the new matrix */
1630   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1631   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1632   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1633 
1634   /* PetscLogObjectParent(*B,iperm); */
1635   b    = (Mat_SeqSBAIJ*)(*B)->data;
1636   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1637   b->singlemalloc = PETSC_FALSE;
1638   /* the next line frees the default space generated by the Create() */
1639   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1640   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1641   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1642   b->j    = ju;
1643   b->i    = iu;
1644   b->diag = 0;
1645   b->ilen = 0;
1646   b->imax = 0;
1647 
1648   if (b->row) {
1649     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1650   }
1651   if (b->icol) {
1652     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1653   }
1654   b->row  = perm;
1655   b->icol = perm;
1656   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1657   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1658   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1659   /* In b structure:  Free imax, ilen, old a, old j.
1660      Allocate idnew, solve_work, new a, new j */
1661   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1662   b->maxnz = b->nz = iu[mbs];
1663 
1664   (*B)->factor                 = FACTOR_CHOLESKY;
1665   (*B)->info.factor_mallocs    = reallocs;
1666   (*B)->info.fill_ratio_given  = f;
1667   if (ai[mbs] != 0) {
1668     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1669   } else {
1670     (*B)->info.fill_ratio_needed = 0.0;
1671   }
1672 
1673   if (perm_identity){
1674     switch (bs) {
1675       case 1:
1676         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1677         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1678         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1679         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1680         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1681         break;
1682       case 2:
1683         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1684         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1685         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1686         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1687         break;
1688       case 3:
1689         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1690         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1691         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1692         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1693         break;
1694       case 4:
1695         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1696         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1697         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1698         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1699         break;
1700       case 5:
1701         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1702         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1703         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1704         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1705         break;
1706       case 6:
1707         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1708         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1709         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1710         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1711         break;
1712       case 7:
1713         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1714         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1715         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1716         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1717       break;
1718       default:
1719         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1720         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1721         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1722         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1723       break;
1724     }
1725   }
1726 
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 
1731 
1732