xref: /petsc/src/mat/impls/sbaij/seq/sbaijfact2.c (revision 72ce74bd30d5d09d1371a3eb387704f4af395d1a)
1 /*
2     Factorization code for SBAIJ format.
3 */
4 
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 #include "src/mat/impls/baij/seq/baij.h"
7 #include "src/inline/ilu.h"
8 #include "src/inline/dot.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering"
12 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
13 {
14   PetscFunctionBegin;
15   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
16 }
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering"
20 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
21 {
22   PetscFunctionBegin;
23   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
24 }
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering"
28 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
29 {
30   PetscFunctionBegin;
31   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering"
36 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
37 {
38   PetscFunctionBegin;
39   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
40 }
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering"
44 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
45 {
46   PetscFunctionBegin;
47   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering"
52 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
53 {
54   PetscFunctionBegin;
55   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering"
60 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
61 {
62   PetscFunctionBegin;
63   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1"
68 PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
69 {
70   PetscFunctionBegin;
71   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2"
76 PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
77 {
78   PetscFunctionBegin;
79   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3"
84 PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
85 {
86   PetscFunctionBegin;
87   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4"
92 PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
93 {
94   PetscFunctionBegin;
95   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5"
100 PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
101 {
102   PetscFunctionBegin;
103   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6"
108 PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
109 {
110   PetscFunctionBegin;
111   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7"
116 PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
117 {
118   PetscFunctionBegin;
119   SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatSolve_SeqSBAIJ_N"
124 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
125 {
126   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
127   IS              isrow=a->row;
128   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
129   int             nz,*vj,k,ierr,*r,idx,k1;
130   int             bs=a->bs,bs2 = a->bs2;
131   MatScalar       *aa=a->a,*v,*diag;
132   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;
133 
134   PetscFunctionBegin;
135   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
136   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
137   t  = a->solve_work;
138   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
139   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
140 
141   /* solve U^T * D * y = b by forward substitution */
142   xk = t;
143   for (k=0; k<mbs; k++) { /* t <- perm(b) */
144     idx   = bs*r[k];
145     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
146   }
147   for (k=0; k<mbs; k++){
148     v  = aa + bs2*ai[k];
149     xk = t + k*bs;      /* Dk*xk = k-th block of x */
150     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
151     nz = ai[k+1] - ai[k];
152     vj = aj + ai[k];
153     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
154     while (nz--) {
155       /* x(:) += U(k,:)^T*(Dk*xk) */
156       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
157       vj++; xj = t + (*vj)*bs;
158       v += bs2;
159     }
160     /* xk = inv(Dk)*(Dk*xk) */
161     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
162     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
163   }
164 
165   /* solve U*x = y by back substitution */
166   for (k=mbs-1; k>=0; k--){
167     v  = aa + bs2*ai[k];
168     xk = t + k*bs;        /* xk */
169     nz = ai[k+1] - ai[k];
170     vj = aj + ai[k];
171     xj = t + (*vj)*bs;
172     while (nz--) {
173       /* xk += U(k,:)*x(:) */
174       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
175       vj++;
176       v += bs2; xj = t + (*vj)*bs;
177     }
178     idx   = bs*r[k];
179     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
180   }
181 
182   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
183   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
184   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
185   PetscLogFlops(bs2*(2*a->nz + mbs));
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering"
191 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
192 {
193   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
194   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
195   int             nz,*vj,k,ierr;
196   int             bs=a->bs,bs2 = a->bs2;
197   MatScalar       *aa=a->a,*v,*diag;
198   PetscScalar     *x,*xk,*xj,*b,*xk_tmp;
199 
200   PetscFunctionBegin;
201 
202   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
203   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
204 
205   ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr);
206 
207   /* solve U^T * D * y = b by forward substitution */
208   ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
209   for (k=0; k<mbs; k++){
210     v  = aa + bs2*ai[k];
211     xk = x + k*bs;      /* Dk*xk = k-th block of x */
212     ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */
213     nz = ai[k+1] - ai[k];
214     vj = aj + ai[k];
215     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
216     while (nz--) {
217       /* x(:) += U(k,:)^T*(Dk*xk) */
218       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
219       vj++; xj = x + (*vj)*bs;
220       v += bs2;
221     }
222     /* xk = inv(Dk)*(Dk*xk) */
223     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
224     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
225   }
226 
227   /* solve U*x = y by back substitution */
228   for (k=mbs-1; k>=0; k--){
229     v  = aa + bs2*ai[k];
230     xk = x + k*bs;        /* xk */
231     nz = ai[k+1] - ai[k];
232     vj = aj + ai[k];
233     xj = x + (*vj)*bs;
234     while (nz--) {
235       /* xk += U(k,:)*x(:) */
236       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
237       vj++;
238       v += bs2; xj = x + (*vj)*bs;
239     }
240   }
241 
242   ierr = PetscFree(xk_tmp);CHKERRQ(ierr);
243   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
244   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
245   PetscLogFlops(bs2*(2*a->nz + mbs));
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatSolve_SeqSBAIJ_7"
251 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
252 {
253   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
254   IS              isrow=a->row;
255   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
256   int             nz,*vj,k,ierr,*r,idx;
257   MatScalar       *aa=a->a,*v,*d;
258   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
259 
260   PetscFunctionBegin;
261   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
262   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
263   t  = a->solve_work;
264   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
265 
266   /* solve U^T * D * y = b by forward substitution */
267   tp = t;
268   for (k=0; k<mbs; k++) { /* t <- perm(b) */
269     idx   = 7*r[k];
270     tp[0] = b[idx];
271     tp[1] = b[idx+1];
272     tp[2] = b[idx+2];
273     tp[3] = b[idx+3];
274     tp[4] = b[idx+4];
275     tp[5] = b[idx+5];
276     tp[6] = b[idx+6];
277     tp += 7;
278   }
279 
280   for (k=0; k<mbs; k++){
281     v  = aa + 49*ai[k];
282     vj = aj + ai[k];
283     tp = t + k*7;
284     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
285     nz = ai[k+1] - ai[k];
286     tp = t + (*vj)*7;
287     while (nz--) {
288       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
289       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
290       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
291       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
292       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
293       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
294       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
295       vj++; tp = t + (*vj)*7;
296       v += 49;
297     }
298 
299     /* xk = inv(Dk)*(Dk*xk) */
300     d  = aa+k*49;          /* ptr to inv(Dk) */
301     tp    = t + k*7;
302     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
303     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
304     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
305     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
306     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
307     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
308     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
309   }
310 
311   /* solve U*x = y by back substitution */
312   for (k=mbs-1; k>=0; k--){
313     v  = aa + 49*ai[k];
314     vj = aj + ai[k];
315     tp    = t + k*7;
316     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
317     nz = ai[k+1] - ai[k];
318 
319     tp = t + (*vj)*7;
320     while (nz--) {
321       /* xk += U(k,:)*x(:) */
322       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
323       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
324       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
325       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
326       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
327       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
328       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
329       vj++; tp = t + (*vj)*7;
330       v += 49;
331     }
332     tp    = t + k*7;
333     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
334     idx   = 7*r[k];
335     x[idx]     = x0;
336     x[idx+1]   = x1;
337     x[idx+2]   = x2;
338     x[idx+3]   = x3;
339     x[idx+4]   = x4;
340     x[idx+5]   = x5;
341     x[idx+6]   = x6;
342   }
343 
344   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
346   PetscLogFlops(49*(2*a->nz + mbs));
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering"
352 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
353 {
354   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
355   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
356   MatScalar       *aa=a->a,*v,*d;
357   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
358   int             nz,*vj,k,ierr;
359 
360   PetscFunctionBegin;
361 
362   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
363   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
364 
365   /* solve U^T * D * y = b by forward substitution */
366   ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
367   for (k=0; k<mbs; k++){
368     v  = aa + 49*ai[k];
369     xp = x + k*7;
370     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
371     nz = ai[k+1] - ai[k];
372     vj = aj + ai[k];
373     xp = x + (*vj)*7;
374     while (nz--) {
375       /* x(:) += U(k,:)^T*(Dk*xk) */
376       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
377       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
378       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
379       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
380       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
381       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
382       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
383       vj++; xp = x + (*vj)*7;
384       v += 49;
385     }
386     /* xk = inv(Dk)*(Dk*xk) */
387     d  = aa+k*49;          /* ptr to inv(Dk) */
388     xp = x + k*7;
389     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
390     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
391     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
392     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
393     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
394     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
395     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
396   }
397 
398   /* solve U*x = y by back substitution */
399   for (k=mbs-1; k>=0; k--){
400     v  = aa + 49*ai[k];
401     xp = x + k*7;
402     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
403     nz = ai[k+1] - ai[k];
404     vj = aj + ai[k];
405     xp = x + (*vj)*7;
406     while (nz--) {
407       /* xk += U(k,:)*x(:) */
408       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
409       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
410       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
411       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
412       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
413       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
414       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
415       vj++;
416       v += 49; xp = x + (*vj)*7;
417     }
418     xp = x + k*7;
419     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
420   }
421 
422   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
423   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
424   PetscLogFlops(49*(2*a->nz + mbs));
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatSolve_SeqSBAIJ_6"
430 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
431 {
432   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
433   IS              isrow=a->row;
434   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
435   int             nz,*vj,k,ierr,*r,idx;
436   MatScalar       *aa=a->a,*v,*d;
437   PetscScalar     *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
438 
439   PetscFunctionBegin;
440   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
441   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
442   t  = a->solve_work;
443   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
444 
445   /* solve U^T * D * y = b by forward substitution */
446   tp = t;
447   for (k=0; k<mbs; k++) { /* t <- perm(b) */
448     idx   = 6*r[k];
449     tp[0] = b[idx];
450     tp[1] = b[idx+1];
451     tp[2] = b[idx+2];
452     tp[3] = b[idx+3];
453     tp[4] = b[idx+4];
454     tp[5] = b[idx+5];
455     tp += 6;
456   }
457 
458   for (k=0; k<mbs; k++){
459     v  = aa + 36*ai[k];
460     vj = aj + ai[k];
461     tp = t + k*6;
462     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
463     nz = ai[k+1] - ai[k];
464     tp = t + (*vj)*6;
465     while (nz--) {
466       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
467       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
468       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
469       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
470       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
471       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
472       vj++; tp = t + (*vj)*6;
473       v += 36;
474     }
475 
476     /* xk = inv(Dk)*(Dk*xk) */
477     d  = aa+k*36;          /* ptr to inv(Dk) */
478     tp    = t + k*6;
479     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
480     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
481     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
482     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
483     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
484     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
485   }
486 
487   /* solve U*x = y by back substitution */
488   for (k=mbs-1; k>=0; k--){
489     v  = aa + 36*ai[k];
490     vj = aj + ai[k];
491     tp    = t + k*6;
492     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
493     nz = ai[k+1] - ai[k];
494 
495     tp = t + (*vj)*6;
496     while (nz--) {
497       /* xk += U(k,:)*x(:) */
498       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
499       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
500       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
501       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
502       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
503       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
504       vj++; tp = t + (*vj)*6;
505       v += 36;
506     }
507     tp    = t + k*6;
508     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
509     idx   = 6*r[k];
510     x[idx]     = x0;
511     x[idx+1]   = x1;
512     x[idx+2]   = x2;
513     x[idx+3]   = x3;
514     x[idx+4]   = x4;
515     x[idx+5]   = x5;
516   }
517 
518   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
519   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
520   PetscLogFlops(36*(2*a->nz + mbs));
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering"
526 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
527 {
528   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
529   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
530   MatScalar       *aa=a->a,*v,*d;
531   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4,x5;
532   int             nz,*vj,k,ierr;
533 
534   PetscFunctionBegin;
535 
536   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
537   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
538 
539   /* solve U^T * D * y = b by forward substitution */
540   ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
541   for (k=0; k<mbs; k++){
542     v  = aa + 36*ai[k];
543     xp = x + k*6;
544     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
545     nz = ai[k+1] - ai[k];
546     vj = aj + ai[k];
547     xp = x + (*vj)*6;
548     while (nz--) {
549       /* x(:) += U(k,:)^T*(Dk*xk) */
550       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
551       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
552       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
553       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
554       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
555       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
556       vj++; xp = x + (*vj)*6;
557       v += 36;
558     }
559     /* xk = inv(Dk)*(Dk*xk) */
560     d  = aa+k*36;          /* ptr to inv(Dk) */
561     xp = x + k*6;
562     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
563     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
564     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
565     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
566     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
567     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
568   }
569 
570   /* solve U*x = y by back substitution */
571   for (k=mbs-1; k>=0; k--){
572     v  = aa + 36*ai[k];
573     xp = x + k*6;
574     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
575     nz = ai[k+1] - ai[k];
576     vj = aj + ai[k];
577     xp = x + (*vj)*6;
578     while (nz--) {
579       /* xk += U(k,:)*x(:) */
580       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
581       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
582       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
583       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
584       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
585       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
586       vj++;
587       v += 36; xp = x + (*vj)*6;
588     }
589     xp = x + k*6;
590     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
591   }
592 
593   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
594   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
595   PetscLogFlops(36*(2*a->nz + mbs));
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatSolve_SeqSBAIJ_5"
601 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
602 {
603   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
604   IS              isrow=a->row;
605   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
606   int             nz,*vj,k,ierr,*r,idx;
607   MatScalar       *aa=a->a,*v,*diag;
608   PetscScalar     *x,*b,x0,x1,x2,x3,x4,*t,*tp;
609 
610   PetscFunctionBegin;
611   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
612   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
613   t  = a->solve_work;
614   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
615 
616   /* solve U^T * D * y = b by forward substitution */
617   tp = t;
618   for (k=0; k<mbs; k++) { /* t <- perm(b) */
619     idx   = 5*r[k];
620     tp[0] = b[idx];
621     tp[1] = b[idx+1];
622     tp[2] = b[idx+2];
623     tp[3] = b[idx+3];
624     tp[4] = b[idx+4];
625     tp += 5;
626   }
627 
628   for (k=0; k<mbs; k++){
629     v  = aa + 25*ai[k];
630     vj = aj + ai[k];
631     tp = t + k*5;
632     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
633     nz = ai[k+1] - ai[k];
634 
635     tp = t + (*vj)*5;
636     while (nz--) {
637       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
638       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
639       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
640       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
641       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
642       vj++; tp = t + (*vj)*5;
643       v += 25;
644     }
645 
646     /* xk = inv(Dk)*(Dk*xk) */
647     diag  = aa+k*25;          /* ptr to inv(Dk) */
648     tp    = t + k*5;
649       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
650       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
651       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
652       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
653       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
654   }
655 
656   /* solve U*x = y by back substitution */
657   for (k=mbs-1; k>=0; k--){
658     v  = aa + 25*ai[k];
659     vj = aj + ai[k];
660     tp    = t + k*5;
661     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
662     nz = ai[k+1] - ai[k];
663 
664     tp = t + (*vj)*5;
665     while (nz--) {
666       /* xk += U(k,:)*x(:) */
667       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
668       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
669       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
670       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
671       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
672       vj++; tp = t + (*vj)*5;
673       v += 25;
674     }
675     tp    = t + k*5;
676     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
677     idx   = 5*r[k];
678     x[idx]     = x0;
679     x[idx+1]   = x1;
680     x[idx+2]   = x2;
681     x[idx+3]   = x3;
682     x[idx+4]   = x4;
683   }
684 
685   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   PetscLogFlops(25*(2*a->nz + mbs));
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering"
693 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
694 {
695   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
696   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
697   MatScalar       *aa=a->a,*v,*diag;
698   PetscScalar     *x,*xp,*b,x0,x1,x2,x3,x4;
699   int             nz,*vj,k,ierr;
700 
701   PetscFunctionBegin;
702 
703   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
704   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
705 
706   /* solve U^T * D * y = b by forward substitution */
707   ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
708   for (k=0; k<mbs; k++){
709     v  = aa + 25*ai[k];
710     xp = x + k*5;
711     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
712     nz = ai[k+1] - ai[k];
713     vj = aj + ai[k];
714     xp = x + (*vj)*5;
715     while (nz--) {
716       /* x(:) += U(k,:)^T*(Dk*xk) */
717       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
718       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
719       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
720       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
721       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
722       vj++; xp = x + (*vj)*5;
723       v += 25;
724     }
725     /* xk = inv(Dk)*(Dk*xk) */
726     diag = aa+k*25;          /* ptr to inv(Dk) */
727     xp   = x + k*5;
728     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
729     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
730     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
731     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
732     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
733   }
734 
735   /* solve U*x = y by back substitution */
736   for (k=mbs-1; k>=0; k--){
737     v  = aa + 25*ai[k];
738     xp = x + k*5;
739     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
740     nz = ai[k+1] - ai[k];
741     vj = aj + ai[k];
742     xp = x + (*vj)*5;
743     while (nz--) {
744       /* xk += U(k,:)*x(:) */
745       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
746       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
747       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
748       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
749       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
750       vj++;
751       v += 25; xp = x + (*vj)*5;
752     }
753     xp = x + k*5;
754     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
755   }
756 
757   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
758   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
759   PetscLogFlops(25*(2*a->nz + mbs));
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatSolve_SeqSBAIJ_4"
765 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
766 {
767   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
768   IS              isrow=a->row;
769   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
770   int             nz,*vj,k,ierr,*r,idx;
771   MatScalar       *aa=a->a,*v,*diag;
772   PetscScalar     *x,*b,x0,x1,x2,x3,*t,*tp;
773 
774   PetscFunctionBegin;
775   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
776   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
777   t  = a->solve_work;
778   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
779 
780   /* solve U^T * D * y = b by forward substitution */
781   tp = t;
782   for (k=0; k<mbs; k++) { /* t <- perm(b) */
783     idx   = 4*r[k];
784     tp[0] = b[idx];
785     tp[1] = b[idx+1];
786     tp[2] = b[idx+2];
787     tp[3] = b[idx+3];
788     tp += 4;
789   }
790 
791   for (k=0; k<mbs; k++){
792     v  = aa + 16*ai[k];
793     vj = aj + ai[k];
794     tp = t + k*4;
795     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
796     nz = ai[k+1] - ai[k];
797 
798     tp = t + (*vj)*4;
799     while (nz--) {
800       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
801       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
802       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
803       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
804       vj++; tp = t + (*vj)*4;
805       v += 16;
806     }
807 
808     /* xk = inv(Dk)*(Dk*xk) */
809     diag  = aa+k*16;          /* ptr to inv(Dk) */
810     tp    = t + k*4;
811     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
812     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
813     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
814     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
815   }
816 
817   /* solve U*x = y by back substitution */
818   for (k=mbs-1; k>=0; k--){
819     v  = aa + 16*ai[k];
820     vj = aj + ai[k];
821     tp    = t + k*4;
822     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
823     nz = ai[k+1] - ai[k];
824 
825     tp = t + (*vj)*4;
826     while (nz--) {
827       /* xk += U(k,:)*x(:) */
828       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
829       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
830       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
831       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
832       vj++; tp = t + (*vj)*4;
833       v += 16;
834     }
835     tp    = t + k*4;
836     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
837     idx        = 4*r[k];
838     x[idx]     = x0;
839     x[idx+1]   = x1;
840     x[idx+2]   = x2;
841     x[idx+3]   = x3;
842   }
843 
844   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
845   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
846   PetscLogFlops(16*(2*a->nz + mbs));
847   PetscFunctionReturn(0);
848 }
849 
850 /*
851    Special case where the matrix was factored in the natural ordering.
852    This eliminates the need for the column and row permutation.
853 */
854 #undef __FUNCT__
855 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering"
856 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
857 {
858   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
859   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
860   MatScalar       *aa=a->a,*v,*diag;
861   PetscScalar     *x,*xp,*b,x0,x1,x2,x3;
862   int             nz,*vj,k,ierr;
863 
864   PetscFunctionBegin;
865 
866   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
867   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
868 
869   /* solve U^T * D * y = b by forward substitution */
870   ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */
871   for (k=0; k<mbs; k++){
872     v  = aa + 16*ai[k];
873     xp = x + k*4;
874     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
875     nz = ai[k+1] - ai[k];
876     vj = aj + ai[k];
877     xp = x + (*vj)*4;
878     while (nz--) {
879       /* x(:) += U(k,:)^T*(Dk*xk) */
880       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
881       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
882       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
883       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
884       vj++; xp = x + (*vj)*4;
885       v += 16;
886     }
887     /* xk = inv(Dk)*(Dk*xk) */
888     diag = aa+k*16;          /* ptr to inv(Dk) */
889     xp   = x + k*4;
890     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
891     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
892     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
893     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
894   }
895 
896   /* solve U*x = y by back substitution */
897   for (k=mbs-1; k>=0; k--){
898     v  = aa + 16*ai[k];
899     xp = x + k*4;
900     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
901     nz = ai[k+1] - ai[k];
902     vj = aj + ai[k];
903     xp = x + (*vj)*4;
904     while (nz--) {
905       /* xk += U(k,:)*x(:) */
906       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
907       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
908       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
909       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
910       vj++;
911       v += 16; xp = x + (*vj)*4;
912     }
913     xp = x + k*4;
914     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
915   }
916 
917   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
918   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
919   PetscLogFlops(16*(2*a->nz + mbs));
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatSolve_SeqSBAIJ_3"
925 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
926 {
927   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
928   IS              isrow=a->row;
929   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
930   int             nz,*vj,k,ierr,*r,idx;
931   MatScalar       *aa=a->a,*v,*diag;
932   PetscScalar     *x,*b,x0,x1,x2,*t,*tp;
933 
934   PetscFunctionBegin;
935   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
936   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
937   t  = a->solve_work;
938   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
939 
940   /* solve U^T * D * y = b by forward substitution */
941   tp = t;
942   for (k=0; k<mbs; k++) { /* t <- perm(b) */
943     idx   = 3*r[k];
944     tp[0] = b[idx];
945     tp[1] = b[idx+1];
946     tp[2] = b[idx+2];
947     tp += 3;
948   }
949 
950   for (k=0; k<mbs; k++){
951     v  = aa + 9*ai[k];
952     vj = aj + ai[k];
953     tp = t + k*3;
954     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
955     nz = ai[k+1] - ai[k];
956 
957     tp = t + (*vj)*3;
958     while (nz--) {
959       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
960       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
961       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
962       vj++; tp = t + (*vj)*3;
963       v += 9;
964     }
965 
966     /* xk = inv(Dk)*(Dk*xk) */
967     diag  = aa+k*9;          /* ptr to inv(Dk) */
968     tp    = t + k*3;
969     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
970     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
971     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
972   }
973 
974   /* solve U*x = y by back substitution */
975   for (k=mbs-1; k>=0; k--){
976     v  = aa + 9*ai[k];
977     vj = aj + ai[k];
978     tp    = t + k*3;
979     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
980     nz = ai[k+1] - ai[k];
981 
982     tp = t + (*vj)*3;
983     while (nz--) {
984       /* xk += U(k,:)*x(:) */
985       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
986       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
987       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
988       vj++; tp = t + (*vj)*3;
989       v += 9;
990     }
991     tp    = t + k*3;
992     tp[0] = x0; tp[1] = x1; tp[2] = x2;
993     idx      = 3*r[k];
994     x[idx]   = x0;
995     x[idx+1] = x1;
996     x[idx+2] = x2;
997   }
998 
999   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1000   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1001   PetscLogFlops(9*(2*a->nz + mbs));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 /*
1006    Special case where the matrix was factored in the natural ordering.
1007    This eliminates the need for the column and row permutation.
1008 */
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering"
1011 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1012 {
1013   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1014   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1015   MatScalar       *aa=a->a,*v,*diag;
1016   PetscScalar     *x,*xp,*b,x0,x1,x2;
1017   int             nz,*vj,k,ierr;
1018 
1019   PetscFunctionBegin;
1020 
1021   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1022   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1023 
1024   /* solve U^T * D * y = b by forward substitution */
1025   ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1026   for (k=0; k<mbs; k++){
1027     v  = aa + 9*ai[k];
1028     xp = x + k*3;
1029     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1030     nz = ai[k+1] - ai[k];
1031     vj = aj + ai[k];
1032     xp = x + (*vj)*3;
1033     while (nz--) {
1034       /* x(:) += U(k,:)^T*(Dk*xk) */
1035       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1036       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1037       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1038       vj++; xp = x + (*vj)*3;
1039       v += 9;
1040     }
1041     /* xk = inv(Dk)*(Dk*xk) */
1042     diag = aa+k*9;          /* ptr to inv(Dk) */
1043     xp   = x + k*3;
1044     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1045     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1046     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1047   }
1048 
1049   /* solve U*x = y by back substitution */
1050   for (k=mbs-1; k>=0; k--){
1051     v  = aa + 9*ai[k];
1052     xp = x + k*3;
1053     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1054     nz = ai[k+1] - ai[k];
1055     vj = aj + ai[k];
1056     xp = x + (*vj)*3;
1057     while (nz--) {
1058       /* xk += U(k,:)*x(:) */
1059       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1060       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1061       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1062       vj++;
1063       v += 9; xp = x + (*vj)*3;
1064     }
1065     xp = x + k*3;
1066     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1067   }
1068 
1069   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1070   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1071   PetscLogFlops(9*(2*a->nz + mbs));
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatSolve_SeqSBAIJ_2"
1077 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1078 {
1079   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ *)A->data;
1080   IS              isrow=a->row;
1081   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1082   int             nz,*vj,k,k2,ierr,*r,idx;
1083   MatScalar       *aa=a->a,*v,*diag;
1084   PetscScalar     *x,*b,x0,x1,*t;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1088   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1089   t  = a->solve_work;
1090   /* printf("called MatSolve_SeqSBAIJ_2\n"); */
1091   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1092 
1093   /* solve U^T * D * y = perm(b) by forward substitution */
1094   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1095     idx = 2*r[k];
1096     t[k*2]   = b[idx];
1097     t[k*2+1] = b[idx+1];
1098   }
1099   for (k=0; k<mbs; k++){
1100     v  = aa + 4*ai[k];
1101     vj = aj + ai[k];
1102     k2 = k*2;
1103     x0 = t[k2]; x1 = t[k2+1];
1104     nz = ai[k+1] - ai[k];
1105     while (nz--) {
1106       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1107       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1108       vj++; v += 4;
1109     }
1110     diag = aa+k*4;  /* ptr to inv(Dk) */
1111     t[k2]   = diag[0]*x0 + diag[2]*x1;
1112     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1113   }
1114 
1115   /* solve U*x = y by back substitution */
1116   for (k=mbs-1; k>=0; k--){
1117     v  = aa + 4*ai[k];
1118     vj = aj + ai[k];
1119     k2 = k*2;
1120     x0 = t[k2]; x1 = t[k2+1];
1121     nz = ai[k+1] - ai[k];
1122     while (nz--) {
1123       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1124       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1125       vj++; v += 4;
1126     }
1127     t[k2]    = x0;
1128     t[k2+1]  = x1;
1129     idx      = 2*r[k];
1130     x[idx]   = x0;
1131     x[idx+1] = x1;
1132   }
1133 
1134   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1135   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1136   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1137   PetscLogFlops(4*(2*a->nz + mbs));
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 /*
1142    Special case where the matrix was factored in the natural ordering.
1143    This eliminates the need for the column and row permutation.
1144 */
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering"
1147 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1148 {
1149   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
1150   int             mbs=a->mbs,*ai=a->i,*aj=a->j;
1151   MatScalar       *aa=a->a,*v,*diag;
1152   PetscScalar     *x,*b,x0,x1;
1153   int             nz,*vj,k,k2,ierr;
1154 
1155   PetscFunctionBegin;
1156 
1157   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1158   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1159 
1160   /* solve U^T * D * y = b by forward substitution */
1161   ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1162   for (k=0; k<mbs; k++){
1163     v  = aa + 4*ai[k];
1164     vj = aj + ai[k];
1165     k2 = k*2;
1166     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1167     nz = ai[k+1] - ai[k];
1168 
1169     while (nz--) {
1170       /* x(:) += U(k,:)^T*(Dk*xk) */
1171       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1172       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1173       vj++; v += 4;
1174     }
1175     /* xk = inv(Dk)*(Dk*xk) */
1176     diag = aa+k*4;          /* ptr to inv(Dk) */
1177     x[k2]   = diag[0]*x0 + diag[2]*x1;
1178     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1179   }
1180 
1181   /* solve U*x = y by back substitution */
1182   for (k=mbs-1; k>=0; k--){
1183     v  = aa + 4*ai[k];
1184     vj = aj + ai[k];
1185     k2 = k*2;
1186     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1187     nz = ai[k+1] - ai[k];
1188     while (nz--) {
1189       /* xk += U(k,:)*x(:) */
1190       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1191       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1192       vj++; v += 4;
1193     }
1194     x[k2]     = x0;
1195     x[k2+1]   = x1;
1196   }
1197 
1198   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1200   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatSolve_SeqSBAIJ_1"
1206 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1207 {
1208   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ *)A->data;
1209   IS              isrow=a->row;
1210   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip;
1211   MatScalar       *aa=a->a,*v;
1212   PetscScalar     *x,*b,xk,*t;
1213   int             nz,*vj,k;
1214 
1215   PetscFunctionBegin;
1216   if (!mbs) PetscFunctionReturn(0);
1217 
1218   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1219   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1220   t    = a->solve_work;
1221 
1222   ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1223 
1224   /* solve U^T*D*y = perm(b) by forward substitution */
1225   for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1226   for (k=0; k<mbs; k++){
1227     v  = aa + ai[k];
1228     vj = aj + ai[k];
1229     xk = t[k];
1230     nz = ai[k+1] - ai[k];
1231     while (nz--) t[*vj++] += (*v++) * xk;
1232     t[k] = xk*aa[k];  /* note: aa[k] = 1/D(k) */
1233   }
1234 
1235   /* solve U*x = y by back substitution */
1236   for (k=mbs-1; k>=0; k--){
1237     v  = aa + ai[k];
1238     vj = aj + ai[k];
1239     xk = t[k];
1240     nz = ai[k+1] - ai[k];
1241     while (nz--) xk += (*v++) * t[*vj++];
1242     t[k]      = xk;
1243     x[rip[k]] = xk;
1244   }
1245 
1246   ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1247   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1248   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1249   PetscLogFlops(4*a->nz + A->m);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatSolves_SeqSBAIJ_1"
1255 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1256 {
1257   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   if (a->bs == 1) {
1262     ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr);
1263   } else {
1264     IS              isrow=a->row;
1265     int             mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i;
1266     MatScalar       *aa=a->a,*v;
1267     PetscScalar     *x,*b,*t;
1268     int             nz,*vj,k,n;
1269     if (bb->n > a->solves_work_n) {
1270       if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
1271       ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr);
1272       a->solves_work_n = bb->n;
1273     }
1274     n    = bb->n;
1275     ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr);
1276     ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr);
1277     t    = a->solves_work;
1278 
1279     ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr);
1280 
1281     /* solve U^T*D*y = perm(b) by forward substitution */
1282     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */
1283     for (k=0; k<mbs; k++){
1284       v  = aa + ai[k];
1285       vj = aj + ai[k];
1286       nz = ai[k+1] - ai[k];
1287       while (nz--) {
1288         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1289         v++;vj++;
1290       }
1291       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1292     }
1293 
1294     /* solve U*x = y by back substitution */
1295     for (k=mbs-1; k>=0; k--){
1296       v  = aa + ai[k];
1297       vj = aj + ai[k];
1298       nz = ai[k+1] - ai[k];
1299       while (nz--) {
1300         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1301         v++;vj++;
1302       }
1303       for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i];
1304     }
1305 
1306     ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr);
1307     ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr);
1308     ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr);
1309     PetscLogFlops(bb->n*(4*a->nz + A->m));
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /*
1315       Special case where the matrix was ILU(0) factored in the natural
1316    ordering. This eliminates the need for the column and row permutation.
1317 */
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering"
1320 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1321 {
1322   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ *)A->data;
1323   int             mbs=a->mbs,*ai=a->i,*aj=a->j,ierr;
1324   MatScalar       *aa=a->a,*v;
1325   PetscScalar     *x,*b,xk;
1326   int             nz,*vj,k;
1327 
1328   PetscFunctionBegin;
1329   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1330   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1331 
1332   /* solve U^T*D*y = b by forward substitution */
1333   ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1334   for (k=0; k<mbs; k++){
1335     v  = aa + ai[k] + 1;
1336     vj = aj + ai[k] + 1;
1337     xk = x[k];
1338     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1339     while (nz--) x[*vj++] += (*v++) * xk;
1340     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1341   }
1342 
1343   /* solve U*x = y by back substitution */
1344   for (k=mbs-2; k>=0; k--){
1345     v  = aa + ai[k] + 1;
1346     vj = aj + ai[k] + 1;
1347     xk = x[k];
1348     nz = ai[k+1] - ai[k] - 1;
1349     while (nz--) xk += (*v++) * x[*vj++];
1350     x[k] = xk;
1351   }
1352 
1353   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
1354   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1355   PetscLogFlops(4*a->nz + A->m);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ"
1362 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1363 {
1364   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1365   int         *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1366   int         *jutmp,bs = a->bs,bs2=a->bs2;
1367   int         m,realloc = 0,*levtmp;
1368   int         *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl;
1369   int         incrlev,*lev,shift,prow,nz;
1370   int         *il,ili,nextprow;
1371   PetscReal   f = info->fill,levels = info->levels;
1372   PetscTruth  perm_identity;
1373 
1374   PetscFunctionBegin;
1375   /* check whether perm is the identity mapping */
1376   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
1377 
1378   /* special case that simply copies fill pattern */
1379   if (!levels && perm_identity && bs==1) {
1380     ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr);
1381     (*B)->factor    = FACTOR_CHOLESKY;
1382     b               = (Mat_SeqSBAIJ*)(*B)->data;
1383     b->row          = perm;
1384     b->icol         = perm;
1385     b->factor_damping   = info->damping;
1386     b->factor_shift     = info->shift;
1387     b->factor_zeropivot = info->zeropivot;
1388     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1389     ierr         = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1390     ierr         = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1391     (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1392     (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1393     PetscFunctionReturn(0);
1394   }
1395 
1396   /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */
1397   if (levels > 0 && perm_identity && bs==1 ){
1398     if (!perm_identity) a->permute = PETSC_TRUE;
1399 
1400   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1401 
1402   if (perm_identity){ /* without permutation */
1403     ai = a->i; aj = a->j;
1404   } else {            /* non-trivial permutation */
1405     ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr);
1406     ai = a->inew; aj = a->jnew;
1407   }
1408 
1409   /* initialization */
1410   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1411   umax  = (int)(f*ai[mbs] + 1);
1412   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1413   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1414   iu[0] = 0;
1415   juidx = 0; /* index for ju */
1416   ierr  = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */
1417   q      = jl + mbs;   /* linked list for col index of active row */
1418   levtmp = q + mbs;
1419   il     = levtmp + mbs;
1420   for (i=0; i<mbs; i++){
1421     jl[i] = mbs;
1422     q[i]  = 0;
1423     il[i] = 0;
1424   }
1425 
1426   /* for each row k */
1427   for (k=0; k<mbs; k++){
1428     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
1429     q[k] = mbs;
1430     /* initialize nonzero structure of k-th row to row rip[k] of A */
1431     jmin = ai[rip[k]] +1; /* exclude diag[k] */
1432     jmax = ai[rip[k]+1];
1433     for (j=jmin; j<jmax; j++){
1434       vj = rip[aj[j]]; /* col. value */
1435       if(vj > k){
1436         qm = k;
1437         do {
1438           m  = qm; qm = q[m];
1439         } while(qm < vj);
1440         if (qm == vj) {
1441           SETERRQ(1," error: duplicate entry in A\n");
1442         }
1443         nzk++;
1444         q[m]  = vj;
1445         q[vj] = qm;
1446         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1447       } /* if(vj > k) */
1448     } /* for (j=jmin; j<jmax; j++) */
1449 
1450     /* modify nonzero structure of k-th row by computing fill-in
1451        for each row i to be merged in */
1452     prow = k;
1453     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
1454 
1455     while (prow < k){
1456       nextprow = jl[prow];
1457       /* merge row prow into k-th row */
1458       ili  = il[prow];
1459       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
1460       jmax = iu[prow+1];
1461       qm   = k;
1462       for (j=jmin; j<jmax; j++){
1463         vj = ju[j];
1464         incrlev = lev[j] + 1;
1465         if (incrlev > levels) continue;
1466         do {
1467           m = qm; qm = q[m];
1468         } while (qm < vj);
1469         if (qm != vj){  /* a fill */
1470           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1471           levtmp[vj]  = incrlev;
1472         } else {
1473           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1474         }
1475       }
1476       if (jmin < jmax){ /* update il and jl */
1477         il[prow] = jmin;
1478         j = ju[jmin];
1479         jl[prow] = jl[j]; jl[j] = prow;
1480       }
1481       prow = nextprow;
1482     }
1483 
1484     /* add the first nonzero element in U(k, k+1:mbs-1) to jl */
1485     if (nzk > 0){
1486       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1487       jl[k] = jl[i]; jl[i] = k;
1488       il[k] = iu[k] + 1;
1489     }
1490     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */
1491 
1492     /* allocate more space to ju if needed */
1493     if (iu[k+1] > umax) {
1494       /* estimate how much additional space we will need */
1495       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1496       /* just double the memory each time */
1497       maxadd = umax;
1498       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1499       umax += maxadd;
1500 
1501       /* allocate a longer ju */
1502       ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1503       ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1504       ierr = PetscFree(ju);CHKERRQ(ierr);
1505       ju   = jutmp;
1506 
1507       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1508       ierr     = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr);
1509       ierr     = PetscFree(lev);CHKERRQ(ierr);
1510       lev      = jutmp;
1511       realloc += 2; /* count how many times we realloc */
1512     }
1513 
1514     /* save nonzero structure of k-th row in ju */
1515     ju[juidx]  = k; /* diag[k] */
1516     lev[juidx] = 0;
1517     juidx++;
1518     i = k;
1519     while (nzk --) {
1520       i           = q[i];
1521       ju[juidx] = i;
1522       lev[juidx] = levtmp[i];
1523       juidx++;
1524     }
1525   } /* end of for (k=0; k<mbs; k++) */
1526 
1527   if (ai[mbs] != 0) {
1528     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1529     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1530     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
1531     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
1532     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
1533   } else {
1534      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1535   }
1536 
1537   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1538   ierr = PetscFree(jl);CHKERRQ(ierr);
1539   ierr = PetscFree(lev);CHKERRQ(ierr);
1540 
1541   /* put together the new matrix */
1542   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1543   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1544   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1545 
1546   /* PetscLogObjectParent(*B,iperm); */
1547   b = (Mat_SeqSBAIJ*)(*B)->data;
1548   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1549   b->singlemalloc = PETSC_FALSE;
1550   /* the next line frees the default space generated by the Create() */
1551   ierr = PetscFree(b->a);CHKERRQ(ierr);
1552   ierr = PetscFree(b->ilen);CHKERRQ(ierr);
1553   ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1554   b->j    = ju;
1555   b->i    = iu;
1556   b->diag = 0;
1557   b->ilen = 0;
1558   b->imax = 0;
1559   b->row  = perm;
1560   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1561   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1562   b->icol = perm;
1563   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1564   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1565   /* In b structure:  Free imax, ilen, old a, old j.
1566      Allocate idnew, solve_work, new a, new j */
1567   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1568   b->maxnz          = b->nz = iu[mbs];
1569   b->factor_damping   = info->damping;
1570   b->factor_shift     = info->shift;
1571   b->factor_zeropivot = info->zeropivot;
1572 
1573   (*B)->factor                 = FACTOR_CHOLESKY;
1574   (*B)->info.factor_mallocs    = realloc;
1575   (*B)->info.fill_ratio_given  = f;
1576   if (ai[mbs] != 0) {
1577     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1578   } else {
1579     (*B)->info.fill_ratio_needed = 0.0;
1580   }
1581 
1582 
1583   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1584   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1585   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
1586 
1587   PetscFunctionReturn(0);
1588   } /* end of if (levels > 0 && perm_identity && bs==1 ) */
1589 
1590   if (!perm_identity) a->permute = PETSC_TRUE;
1591   if (perm_identity){
1592     ai = a->i; aj = a->j;
1593   } else { /*  non-trivial permutation */
1594     ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr);
1595     ai = a->inew; aj = a->jnew;
1596   }
1597 
1598   /* initialization */
1599   ierr  = ISGetIndices(perm,&rip);CHKERRQ(ierr);
1600   umax  = (int)(f*ai[mbs] + 1);
1601   ierr  = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr);
1602   umax += mbs + 1;
1603   shift = mbs + 1;
1604   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr);
1605   ierr  = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr);
1606   iu[0] = mbs + 1;
1607   juidx = mbs + 1;
1608   /* prowl: linked list for pivot row */
1609   ierr    = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr);
1610   /* q: linked list for col index */
1611   q       = prowl + mbs;
1612   levtmp  = q     + mbs;
1613 
1614   for (i=0; i<mbs; i++){
1615     prowl[i] = mbs;
1616     q[i] = 0;
1617   }
1618 
1619   /* for each row k */
1620   for (k=0; k<mbs; k++){
1621     nzk  = 0;
1622     q[k] = mbs;
1623     /* copy current row into linked list */
1624     nz = ai[rip[k]+1] - ai[rip[k]];
1625     j = ai[rip[k]];
1626     while (nz--){
1627       vj = rip[aj[j++]];
1628       if (vj > k){
1629         qm = k;
1630         do {
1631           m  = qm; qm = q[m];
1632         } while(qm < vj);
1633         if (qm == vj) {
1634           SETERRQ(1," error: duplicate entry in A\n");
1635         }
1636         nzk++;
1637         q[m]       = vj;
1638         q[vj]      = qm;
1639         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1640       }
1641     }
1642 
1643     /* modify nonzero structure of k-th row by computing fill-in
1644        for each row prow to be merged in */
1645     prow = k;
1646     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1647 
1648     while (prow < k){
1649       /* merge row prow into k-th row */
1650       jmin = iu[prow] + 1;
1651       jmax = iu[prow+1];
1652       qm = k;
1653       for (j=jmin; j<jmax; j++){
1654         incrlev = lev[j-shift] + 1;
1655 	if (incrlev > levels) continue;
1656 
1657         vj      = ju[j];
1658         do {
1659           m = qm; qm = q[m];
1660         } while (qm < vj);
1661         if (qm != vj){      /* a fill */
1662           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1663           levtmp[vj] = incrlev;
1664         } else {
1665           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1666         }
1667       }
1668       prow = prowl[prow]; /* next pivot row */
1669     }
1670 
1671     /* add k to row list for first nonzero element in k-th row */
1672     if (nzk > 1){
1673       i = q[k]; /* col value of first nonzero element in k_th row of U */
1674       prowl[k] = prowl[i]; prowl[i] = k;
1675     }
1676     iu[k+1] = iu[k] + nzk;
1677 
1678     /* allocate more space to ju and lev if needed */
1679     if (iu[k+1] > umax) {
1680       /* estimate how much additional space we will need */
1681       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1682       /* just double the memory each time */
1683       maxadd = umax;
1684       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1685       umax += maxadd;
1686 
1687       /* allocate a longer ju */
1688       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1689       ierr     = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr);
1690       ierr     = PetscFree(ju);CHKERRQ(ierr);
1691       ju       = jutmp;
1692 
1693       ierr     = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr);
1694       ierr     = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr);
1695       ierr     = PetscFree(lev);CHKERRQ(ierr);
1696       lev      = jutmp;
1697       realloc += 2; /* count how many times we realloc */
1698     }
1699 
1700     /* save nonzero structure of k-th row in ju */
1701     i=k;
1702     while (nzk --) {
1703       i                = q[i];
1704       ju[juidx]        = i;
1705       lev[juidx-shift] = levtmp[i];
1706       juidx++;
1707     }
1708   }
1709 
1710   if (ai[mbs] != 0) {
1711     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1712     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1713     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af);
1714     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af);
1715     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n");
1716   } else {
1717     PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
1718   }
1719 
1720   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
1721   ierr = PetscFree(prowl);CHKERRQ(ierr);
1722   ierr = PetscFree(lev);CHKERRQ(ierr);
1723 
1724   /* put together the new matrix */
1725   ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr);
1726   ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr);
1727   ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr);
1728 
1729   /* PetscLogObjectParent(*B,iperm); */
1730   b    = (Mat_SeqSBAIJ*)(*B)->data;
1731   ierr = PetscFree(b->imax);CHKERRQ(ierr);
1732   b->singlemalloc = PETSC_FALSE;
1733   /* the next line frees the default space generated by the Create() */
1734   ierr    = PetscFree(b->a);CHKERRQ(ierr);
1735   ierr    = PetscFree(b->ilen);CHKERRQ(ierr);
1736   ierr    = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
1737   b->j    = ju;
1738   b->i    = iu;
1739   b->diag = 0;
1740   b->ilen = 0;
1741   b->imax = 0;
1742 
1743   if (b->row) {
1744     ierr = ISDestroy(b->row);CHKERRQ(ierr);
1745   }
1746   if (b->icol) {
1747     ierr = ISDestroy(b->icol);CHKERRQ(ierr);
1748   }
1749   b->row  = perm;
1750   b->icol = perm;
1751   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1752   ierr    = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
1753   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
1754   /* In b structure:  Free imax, ilen, old a, old j.
1755      Allocate idnew, solve_work, new a, new j */
1756   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1757   b->maxnz = b->nz = iu[mbs];
1758 
1759   (*B)->factor                 = FACTOR_CHOLESKY;
1760   (*B)->info.factor_mallocs    = realloc;
1761   (*B)->info.fill_ratio_given  = f;
1762   if (ai[mbs] != 0) {
1763     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1764   } else {
1765     (*B)->info.fill_ratio_needed = 0.0;
1766   }
1767 
1768   if (perm_identity){
1769     switch (bs) {
1770       case 1:
1771         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1772         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1773         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
1774         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n");
1775         break;
1776       case 2:
1777         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1778         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1779         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1780         break;
1781       case 3:
1782         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1783         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1784         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
1785         break;
1786       case 4:
1787         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1788         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1789         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1790         break;
1791       case 5:
1792         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1793         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1794         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1795         break;
1796       case 6:
1797         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1798         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1799         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1800         break;
1801       case 7:
1802         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1803         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1804         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1805       break;
1806       default:
1807         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1808         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1809         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
1810       break;
1811     }
1812   }
1813 
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 
1818 
1819