xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 077c4231e34fe2fc24e74260814973911a0e4bce)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0]   = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v  = aa + ai[i];
162         vi = aj + ai[i];
163         nz = diag[i] - ai[i];
164         s1 = b[i2];
165         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
166         x[i2]  = idiag[0]*s1;
167         idiag += 1;
168         i2    += 1;
169       }
170       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
171       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
172     }
173     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
174         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
175       i2    = 0;
176       mdiag = a->idiag+a->mbs;
177       for (i=0; i<m; i++) {
178         x1     = x[i2];
179         x[i2]  = mdiag[0]*x1;
180         mdiag += 1;
181         i2    += 1;
182       }
183       ierr = PetscLogFlops(m);CHKERRQ(ierr);
184     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
185       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
186     }
187     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
188       idiag  = a->idiag+a->mbs - 1;
189       i2     = m - 1;
190       x1     = x[i2];
191       x[i2]  = idiag[0]*x1;
192       idiag -= 1;
193       i2    -= 1;
194       for (i=m-2; i>=0; i--) {
195         v  = aa + (diag[i]+1);
196         vi = aj + diag[i] + 1;
197         nz = ai[i+1] - diag[i] - 1;
198         s1 = x[i2];
199         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
200         x[i2]  = idiag[0]*s1;
201         idiag -= 1;
202         i2    -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar       *x,x1,x2,s1,s2;
218   const PetscScalar *b;
219   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
220   PetscErrorCode    ierr;
221   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
222   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
242       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247         v  = aa + 4*ai[i];
248         vi = aj + ai[i];
249         nz = diag[i] - ai[i];
250         s1 = b[i2]; s2 = b[i2+1];
251         for (j=0; j<nz; j++) {
252           idx = 2*vi[j];
253           it  = 4*j;
254           x1  = x[idx]; x2 = x[1+idx];
255           s1 -= v[it]*x1 + v[it+2]*x2;
256           s2 -= v[it+1]*x1 + v[it+3]*x2;
257         }
258         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag  += 4;
261         i2     += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag  -= 4;
288       i2     -= 2;
289       for (i=m-2; i>=0; i--) {
290         v  = aa + 4*(diag[i]+1);
291         vi = aj + diag[i] + 1;
292         nz = ai[i+1] - diag[i] - 1;
293         s1 = x[i2]; s2 = x[i2+1];
294         for (j=0; j<nz; j++) {
295           idx = 2*vi[j];
296           it  = 4*j;
297           x1  = x[idx]; x2 = x[1+idx];
298           s1 -= v[it]*x1 + v[it+2]*x2;
299           s2 -= v[it+1]*x1 + v[it+3]*x2;
300         }
301         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag  -= 4;
304         i2     -= 2;
305       }
306       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
307     }
308   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
317 {
318   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
319   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
320   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
321   const PetscScalar *b;
322   PetscErrorCode    ierr;
323   PetscInt          m = a->mbs,i,i2,nz,idx;
324   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
344       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v  = aa + 9*ai[i];
351         vi = aj + ai[i];
352         nz = diag[i] - ai[i];
353         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx = 3*(*vi++);
356           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v  += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag  += 9;
366         i2     += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag  -= 9;
395       i2     -= 3;
396       for (i=m-2; i>=0; i--) {
397         v  = aa + 9*(diag[i]+1);
398         vi = aj + diag[i] + 1;
399         nz = ai[i+1] - diag[i] - 1;
400         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx = 3*(*vi++);
403           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v  += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag  -= 9;
413         i2     -= 3;
414       }
415       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
416     }
417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
429   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
430   const PetscScalar *b;
431   PetscErrorCode    ierr;
432   PetscInt          m = a->mbs,i,i2,nz,idx;
433   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
434 
435   PetscFunctionBegin;
436   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
437   its = its*lits;
438   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
439   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
440   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
441   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
442   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
443 
444   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
445 
446   diag  = a->diag;
447   idiag = a->idiag;
448   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
450 
451   if (flag & SOR_ZERO_INITIAL_GUESS) {
452     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
453       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
454       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
455       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
456       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
457       i2     = 4;
458       idiag += 16;
459       for (i=1; i<m; i++) {
460         v  = aa + 16*ai[i];
461         vi = aj + ai[i];
462         nz = diag[i] - ai[i];
463         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
464         while (nz--) {
465           idx = 4*(*vi++);
466           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
467           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
468           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
469           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
470           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
471           v  += 16;
472         }
473         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
474         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
475         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
476         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
477         idiag  += 16;
478         i2     += 4;
479       }
480       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
481       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
482     }
483     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
484         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
485       i2    = 0;
486       mdiag = a->idiag+16*a->mbs;
487       for (i=0; i<m; i++) {
488         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
489         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
490         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
491         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
492         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
493         mdiag  += 16;
494         i2     += 4;
495       }
496       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
497     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
498       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
499     }
500     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
501       idiag   = a->idiag+16*a->mbs - 16;
502       i2      = 4*m - 4;
503       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
505       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
506       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
507       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
508       idiag  -= 16;
509       i2     -= 4;
510       for (i=m-2; i>=0; i--) {
511         v  = aa + 16*(diag[i]+1);
512         vi = aj + diag[i] + 1;
513         nz = ai[i+1] - diag[i] - 1;
514         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
515         while (nz--) {
516           idx = 4*(*vi++);
517           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
518           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
519           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
520           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
521           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
522           v  += 16;
523         }
524         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
525         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
526         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
527         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
528         idiag  -= 16;
529         i2     -= 4;
530       }
531       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
532     }
533   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
545   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
546   const PetscScalar *b;
547   PetscErrorCode    ierr;
548   PetscInt          m = a->mbs,i,i2,nz,idx;
549   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
550 
551   PetscFunctionBegin;
552   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
553   its = its*lits;
554   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
555   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
556   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
557   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
558   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
559 
560   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
561 
562   diag  = a->diag;
563   idiag = a->idiag;
564   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
570       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
571       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
572       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
573       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
574       i2     = 5;
575       idiag += 25;
576       for (i=1; i<m; i++) {
577         v  = aa + 25*ai[i];
578         vi = aj + ai[i];
579         nz = diag[i] - ai[i];
580         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
581         while (nz--) {
582           idx = 5*(*vi++);
583           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
584           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
585           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
586           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
587           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
588           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
589           v  += 25;
590         }
591         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
592         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
593         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
594         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
595         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
596         idiag  += 25;
597         i2     += 5;
598       }
599       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
600       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
601     }
602     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
603         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
604       i2    = 0;
605       mdiag = a->idiag+25*a->mbs;
606       for (i=0; i<m; i++) {
607         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
608         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
609         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
610         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
611         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
612         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
613         mdiag  += 25;
614         i2     += 5;
615       }
616       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
617     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
618       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
619     }
620     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       idiag   = a->idiag+25*a->mbs - 25;
622       i2      = 5*m - 5;
623       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
624       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
625       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
626       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
627       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
628       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
629       idiag  -= 25;
630       i2     -= 5;
631       for (i=m-2; i>=0; i--) {
632         v  = aa + 25*(diag[i]+1);
633         vi = aj + diag[i] + 1;
634         nz = ai[i+1] - diag[i] - 1;
635         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
636         while (nz--) {
637           idx = 5*(*vi++);
638           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
639           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
640           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
641           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
642           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
643           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
644           v  += 25;
645         }
646         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
647         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
648         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
649         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
650         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
651         idiag  -= 25;
652         i2     -= 5;
653       }
654       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
655     }
656   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
665 {
666   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
667   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
668   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
669   const PetscScalar *b;
670   PetscErrorCode    ierr;
671   PetscInt          m = a->mbs,i,i2,nz,idx;
672   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
673 
674   PetscFunctionBegin;
675   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
676   its = its*lits;
677   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
678   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
679   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
680   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
681   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
682 
683   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
684 
685   diag  = a->diag;
686   idiag = a->idiag;
687   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
689 
690   if (flag & SOR_ZERO_INITIAL_GUESS) {
691     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
692       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
693       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
694       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
695       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
696       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
697       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
698       i2     = 6;
699       idiag += 36;
700       for (i=1; i<m; i++) {
701         v  = aa + 36*ai[i];
702         vi = aj + ai[i];
703         nz = diag[i] - ai[i];
704         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
705         while (nz--) {
706           idx = 6*(*vi++);
707           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
708           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
709           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
710           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
711           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
712           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
713           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
714           v  += 36;
715         }
716         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
717         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
718         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
719         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
720         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
721         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
722         idiag  += 36;
723         i2     += 6;
724       }
725       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
726       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
727     }
728     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
729         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
730       i2    = 0;
731       mdiag = a->idiag+36*a->mbs;
732       for (i=0; i<m; i++) {
733         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
734         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
735         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
736         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
737         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
738         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
739         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
740         mdiag  += 36;
741         i2     += 6;
742       }
743       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
744     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
745       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
746     }
747     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       idiag   = a->idiag+36*a->mbs - 36;
749       i2      = 6*m - 6;
750       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
751       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
752       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
753       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
754       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
755       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
756       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
757       idiag  -= 36;
758       i2     -= 6;
759       for (i=m-2; i>=0; i--) {
760         v  = aa + 36*(diag[i]+1);
761         vi = aj + diag[i] + 1;
762         nz = ai[i+1] - diag[i] - 1;
763         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
764         while (nz--) {
765           idx = 6*(*vi++);
766           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
767           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
768           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
769           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
770           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
771           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
772           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
773           v  += 36;
774         }
775         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
776         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
777         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
778         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
779         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
780         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
781         idiag  -= 36;
782         i2     -= 6;
783       }
784       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
785     }
786   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
795 {
796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
797   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
798   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
799   const PetscScalar *b;
800   PetscErrorCode    ierr;
801   PetscInt          m = a->mbs,i,i2,nz,idx;
802   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
803 
804   PetscFunctionBegin;
805   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
806   its = its*lits;
807   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
808   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
809   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
810   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
811   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
812 
813   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
814 
815   diag  = a->diag;
816   idiag = a->idiag;
817   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
819 
820   if (flag & SOR_ZERO_INITIAL_GUESS) {
821     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
822       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
823       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
824       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
825       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
826       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
827       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
828       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
829       i2     = 7;
830       idiag += 49;
831       for (i=1; i<m; i++) {
832         v  = aa + 49*ai[i];
833         vi = aj + ai[i];
834         nz = diag[i] - ai[i];
835         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
836         while (nz--) {
837           idx = 7*(*vi++);
838           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
839           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
840           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
841           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
842           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
843           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
844           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
845           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
846           v  += 49;
847         }
848         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
849         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
850         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
851         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
852         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
853         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
854         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
855         idiag  += 49;
856         i2     += 7;
857       }
858       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
859       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
860     }
861     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
862         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
863       i2    = 0;
864       mdiag = a->idiag+49*a->mbs;
865       for (i=0; i<m; i++) {
866         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
867         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
868         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
869         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
870         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
871         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
872         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
873         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
874         mdiag  += 49;
875         i2     += 7;
876       }
877       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
878     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
880     }
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag   = a->idiag+49*a->mbs - 49;
883       i2      = 7*m - 7;
884       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
885       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
886       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
887       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
888       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
889       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
890       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
891       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
892       idiag  -= 49;
893       i2     -= 7;
894       for (i=m-2; i>=0; i--) {
895         v  = aa + 49*(diag[i]+1);
896         vi = aj + diag[i] + 1;
897         nz = ai[i+1] - diag[i] - 1;
898         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
899         while (nz--) {
900           idx = 7*(*vi++);
901           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
902           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
903           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
904           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
905           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
906           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
907           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
908           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
909           v  += 49;
910         }
911         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
912         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
913         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
914         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
915         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
916         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
917         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
918         idiag  -= 49;
919         i2     -= 7;
920       }
921       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
922     }
923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
932 {
933   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
934   PetscScalar       *x,*work,*w,*workt;
935   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
936   const PetscScalar *b;
937   PetscErrorCode    ierr;
938   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
939   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
940 
941   PetscFunctionBegin;
942   its = its*lits;
943   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
944   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
945   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
946   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
947   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
948   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
949 
950   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
951 
952   diag  = a->diag;
953   idiag = a->idiag;
954   if (!a->mult_work) {
955     k    = PetscMax(A->rmap->n,A->cmap->n);
956     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
957   }
958   work = a->mult_work;
959   if (!a->sor_work) {
960     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
961   }
962   w = a->sor_work;
963 
964   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
966 
967   if (flag & SOR_ZERO_INITIAL_GUESS) {
968     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
969       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
970       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
971       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
972       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
973       i2     = bs;
974       idiag += bs2;
975       for (i=1; i<m; i++) {
976         v  = aa + bs2*ai[i];
977         vi = aj + ai[i];
978         nz = diag[i] - ai[i];
979 
980         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
981         /* copy all rows of x that are needed into contiguous space */
982         workt = work;
983         for (j=0; j<nz; j++) {
984           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
985           workt += bs;
986         }
987         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
988         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
989          while (nz--) {
990            idx  = N*(*vi++);
991            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
992            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
993            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
994            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
995            v   += N2;
996            } */
997 
998         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
999         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1000         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1001         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1002 
1003         idiag += bs2;
1004         i2    += bs;
1005       }
1006       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1007       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1008     }
1009     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1010         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1011       i2    = 0;
1012       mdiag = a->idiag+bs2*a->mbs;
1013       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1014       for (i=0; i<m; i++) {
1015         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1016         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1017         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1018         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1019         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1020 
1021         mdiag += bs2;
1022         i2    += bs;
1023       }
1024       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1025     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1026       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1027     }
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = a->idiag+bs2*a->mbs - bs2;
1030       i2    = bs*m - bs;
1031       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1033       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1034       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1035       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1036       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1037       idiag -= bs2;
1038       i2    -= bs;
1039       for (i=m-2; i>=0; i--) {
1040         v  = aa + bs2*(diag[i]+1);
1041         vi = aj + diag[i] + 1;
1042         nz = ai[i+1] - diag[i] - 1;
1043 
1044         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1045         /* copy all rows of x that are needed into contiguous space */
1046         workt = work;
1047         for (j=0; j<nz; j++) {
1048           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049           workt += bs;
1050         }
1051         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1052         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1053         while (nz--) {
1054           idx  = N*(*vi++);
1055           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1056           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1057           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1058           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1059           v   += N2;
1060           } */
1061 
1062         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1063         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1064         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1065         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1066         idiag -= bs2;
1067         i2    -= bs;
1068       }
1069       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1070     }
1071   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1079 */
1080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1083 #define matsetvaluesblocked4_ matsetvaluesblocked4
1084 #endif
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "matsetvaluesblocked4_"
1089 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1090 {
1091   Mat               A  = *AA;
1092   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1093   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1094   PetscInt          *ai    =a->i,*ailen=a->ilen;
1095   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1096   const PetscScalar *value = v;
1097   MatScalar         *ap,*aa = a->a,*bap;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1101   stepval = (n-1)*4;
1102   for (k=0; k<m; k++) { /* loop over added rows */
1103     row  = im[k];
1104     rp   = aj + ai[row];
1105     ap   = aa + 16*ai[row];
1106     nrow = ailen[row];
1107     low  = 0;
1108     high = nrow;
1109     for (l=0; l<n; l++) { /* loop over added columns */
1110       col = in[l];
1111       if (col <= lastcol)  low = 0;
1112       else                high = nrow;
1113       lastcol = col;
1114       value   = v + k*(stepval+4 + l)*4;
1115       while (high-low > 7) {
1116         t = (low+high)/2;
1117         if (rp[t] > col) high = t;
1118         else             low  = t;
1119       }
1120       for (i=low; i<high; i++) {
1121         if (rp[i] > col) break;
1122         if (rp[i] == col) {
1123           bap = ap +  16*i;
1124           for (ii=0; ii<4; ii++,value+=stepval) {
1125             for (jj=ii; jj<16; jj+=4) {
1126               bap[jj] += *value++;
1127             }
1128           }
1129           goto noinsert2;
1130         }
1131       }
1132       N = nrow++ - 1;
1133       high++; /* added new column index thus must search to one higher than before */
1134       /* shift up all the later entries in this row */
1135       for (ii=N; ii>=i; ii--) {
1136         rp[ii+1] = rp[ii];
1137         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1138       }
1139       if (N >= i) {
1140         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1141       }
1142       rp[i] = col;
1143       bap   = ap +  16*i;
1144       for (ii=0; ii<4; ii++,value+=stepval) {
1145         for (jj=ii; jj<16; jj+=4) {
1146           bap[jj] = *value++;
1147         }
1148       }
1149       noinsert2:;
1150       low = i;
1151     }
1152     ailen[row] = nrow;
1153   }
1154   PetscFunctionReturnVoid();
1155 }
1156 EXTERN_C_END
1157 
1158 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1159 #define matsetvalues4_ MATSETVALUES4
1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1161 #define matsetvalues4_ matsetvalues4
1162 #endif
1163 
1164 EXTERN_C_BEGIN
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatSetValues4_"
1167 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1168 {
1169   Mat         A  = *AA;
1170   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1171   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1172   PetscInt    *ai=a->i,*ailen=a->ilen;
1173   PetscInt    *aj=a->j,brow,bcol;
1174   PetscInt    ridx,cidx,lastcol = -1;
1175   MatScalar   *ap,value,*aa=a->a,*bap;
1176 
1177   PetscFunctionBegin;
1178   for (k=0; k<m; k++) { /* loop over added rows */
1179     row  = im[k]; brow = row/4;
1180     rp   = aj + ai[brow];
1181     ap   = aa + 16*ai[brow];
1182     nrow = ailen[brow];
1183     low  = 0;
1184     high = nrow;
1185     for (l=0; l<n; l++) { /* loop over added columns */
1186       col   = in[l]; bcol = col/4;
1187       ridx  = row % 4; cidx = col % 4;
1188       value = v[l + k*n];
1189       if (col <= lastcol)  low = 0;
1190       else                high = nrow;
1191       lastcol = col;
1192       while (high-low > 7) {
1193         t = (low+high)/2;
1194         if (rp[t] > bcol) high = t;
1195         else              low  = t;
1196       }
1197       for (i=low; i<high; i++) {
1198         if (rp[i] > bcol) break;
1199         if (rp[i] == bcol) {
1200           bap   = ap +  16*i + 4*cidx + ridx;
1201           *bap += value;
1202           goto noinsert1;
1203         }
1204       }
1205       N = nrow++ - 1;
1206       high++; /* added new column thus must search to one higher than before */
1207       /* shift up all the later entries in this row */
1208       for (ii=N; ii>=i; ii--) {
1209         rp[ii+1] = rp[ii];
1210         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1211       }
1212       if (N>=i) {
1213         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1214       }
1215       rp[i]                    = bcol;
1216       ap[16*i + 4*cidx + ridx] = value;
1217 noinsert1:;
1218       low = i;
1219     }
1220     ailen[brow] = nrow;
1221   }
1222   PetscFunctionReturnVoid();
1223 }
1224 EXTERN_C_END
1225 
1226 /*
1227      Checks for missing diagonals
1228 */
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1232 {
1233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       *diag,*jj = a->j,i;
1236 
1237   PetscFunctionBegin;
1238   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1239   *missing = PETSC_FALSE;
1240   if (A->rmap->n > 0 && !jj) {
1241     *missing = PETSC_TRUE;
1242     if (d) *d = 0;
1243     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1244   } else {
1245     diag = a->diag;
1246     for (i=0; i<a->mbs; i++) {
1247       if (jj[diag[i]] != i) {
1248         *missing = PETSC_TRUE;
1249         if (d) *d = i;
1250         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1251         break;
1252       }
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1261 {
1262   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264   PetscInt       i,j,m = a->mbs;
1265 
1266   PetscFunctionBegin;
1267   if (!a->diag) {
1268     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1269     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1270     a->free_diag = PETSC_TRUE;
1271   }
1272   for (i=0; i<m; i++) {
1273     a->diag[i] = a->i[i+1];
1274     for (j=a->i[i]; j<a->i[i+1]; j++) {
1275       if (a->j[j] == i) {
1276         a->diag[i] = j;
1277         break;
1278       }
1279     }
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1288 {
1289   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1290   PetscErrorCode ierr;
1291   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1292   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1293 
1294   PetscFunctionBegin;
1295   *nn = n;
1296   if (!ia) PetscFunctionReturn(0);
1297   if (symmetric) {
1298     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1299     nz   = tia[n];
1300   } else {
1301     tia = a->i; tja = a->j;
1302   }
1303 
1304   if (!blockcompressed && bs > 1) {
1305     (*nn) *= bs;
1306     /* malloc & create the natural set of indices */
1307     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1308     if (n) {
1309       (*ia)[0] = 0;
1310       for (j=1; j<bs; j++) {
1311         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1312       }
1313     }
1314 
1315     for (i=1; i<n; i++) {
1316       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1317       for (j=1; j<bs; j++) {
1318         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1319       }
1320     }
1321     if (n) {
1322       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1323     }
1324 
1325     if (inja) {
1326       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1327       cnt = 0;
1328       for (i=0; i<n; i++) {
1329         for (j=0; j<bs; j++) {
1330           for (k=tia[i]; k<tia[i+1]; k++) {
1331             for (l=0; l<bs; l++) {
1332               (*ja)[cnt++] = bs*tja[k] + l;
1333             }
1334           }
1335         }
1336       }
1337     }
1338 
1339     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1340       ierr = PetscFree(tia);CHKERRQ(ierr);
1341       ierr = PetscFree(tja);CHKERRQ(ierr);
1342     }
1343   } else if (oshift == 1) {
1344     if (symmetric) {
1345       nz = tia[A->rmap->n/bs];
1346       /*  add 1 to i and j indices */
1347       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1348       *ia = tia;
1349       if (ja) {
1350         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1351         *ja = tja;
1352       }
1353     } else {
1354       nz = a->i[A->rmap->n/bs];
1355       /* malloc space and  add 1 to i and j indices */
1356       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1357       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1358       if (ja) {
1359         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1360         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1361       }
1362     }
1363   } else {
1364     *ia = tia;
1365     if (ja) *ja = tja;
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1373 {
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   if (!ia) PetscFunctionReturn(0);
1378   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1379     ierr = PetscFree(*ia);CHKERRQ(ierr);
1380     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1388 {
1389   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393 #if defined(PETSC_USE_LOG)
1394   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1395 #endif
1396   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1397   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1398   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1399   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1400   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1401   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1402   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1403   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1404   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1405   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1406   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1407   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1408   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1409 
1410   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1411   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1412   ierr = PetscFree(A->data);CHKERRQ(ierr);
1413 
1414   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   switch (op) {
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     break;
1440   case MAT_KEEP_NONZERO_PATTERN:
1441     a->keepnonzeropattern = flg;
1442     break;
1443   case MAT_NEW_NONZERO_LOCATIONS:
1444     a->nonew = (flg ? 0 : 1);
1445     break;
1446   case MAT_NEW_NONZERO_LOCATION_ERR:
1447     a->nonew = (flg ? -1 : 0);
1448     break;
1449   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1450     a->nonew = (flg ? -2 : 0);
1451     break;
1452   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1453     a->nounused = (flg ? -1 : 0);
1454     break;
1455   case MAT_CHECK_COMPRESSED_ROW:
1456     a->compressedrow.check = flg;
1457     break;
1458   case MAT_NEW_DIAGONALS:
1459   case MAT_IGNORE_OFF_PROC_ENTRIES:
1460   case MAT_USE_HASH_TABLE:
1461     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1462     break;
1463   case MAT_SPD:
1464   case MAT_SYMMETRIC:
1465   case MAT_STRUCTURALLY_SYMMETRIC:
1466   case MAT_HERMITIAN:
1467   case MAT_SYMMETRY_ETERNAL:
1468     /* These options are handled directly by MatSetOption() */
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1483   MatScalar      *aa,*aa_i;
1484   PetscScalar    *v_i;
1485 
1486   PetscFunctionBegin;
1487   bs  = A->rmap->bs;
1488   ai  = a->i;
1489   aj  = a->j;
1490   aa  = a->a;
1491   bs2 = a->bs2;
1492 
1493   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1494 
1495   bn  = row/bs;   /* Block number */
1496   bp  = row % bs; /* Block Position */
1497   M   = ai[bn+1] - ai[bn];
1498   *nz = bs*M;
1499 
1500   if (v) {
1501     *v = 0;
1502     if (*nz) {
1503       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1504       for (i=0; i<M; i++) { /* for each block in the block row */
1505         v_i  = *v + i*bs;
1506         aa_i = aa + bs2*(ai[bn] + i);
1507         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1508       }
1509     }
1510   }
1511 
1512   if (idx) {
1513     *idx = 0;
1514     if (*nz) {
1515       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1516       for (i=0; i<M; i++) { /* for each block in the block row */
1517         idx_i = *idx + i*bs;
1518         itmp  = bs*aj[ai[bn] + i];
1519         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1520       }
1521     }
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1529 {
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1534   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1543 {
1544   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1545   Mat            C;
1546   PetscErrorCode ierr;
1547   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1548   PetscInt       *rows,*cols,bs2=a->bs2;
1549   MatScalar      *array;
1550 
1551   PetscFunctionBegin;
1552   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1553   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1554     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1555     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1556 
1557     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1558     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1559     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1560     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1561     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1562     ierr = PetscFree(col);CHKERRQ(ierr);
1563   } else {
1564     C = *B;
1565   }
1566 
1567   array = a->a;
1568   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1569   for (i=0; i<mbs; i++) {
1570     cols[0] = i*bs;
1571     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1572     len = ai[i+1] - ai[i];
1573     for (j=0; j<len; j++) {
1574       rows[0] = (*aj++)*bs;
1575       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1576       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1577       array += bs2;
1578     }
1579   }
1580   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1581 
1582   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584 
1585   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1586     *B = C;
1587   } else {
1588     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 EXTERN_C_BEGIN
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1596 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1597 {
1598   PetscErrorCode ierr;
1599   Mat            Btrans;
1600 
1601   PetscFunctionBegin;
1602   *f   = PETSC_FALSE;
1603   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1604   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1605   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1612 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1613 {
1614   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1615   PetscErrorCode ierr;
1616   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1617   int            fd;
1618   PetscScalar    *aa;
1619   FILE           *file;
1620 
1621   PetscFunctionBegin;
1622   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1623   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1624   col_lens[0] = MAT_FILE_CLASSID;
1625 
1626   col_lens[1] = A->rmap->N;
1627   col_lens[2] = A->cmap->n;
1628   col_lens[3] = a->nz*bs2;
1629 
1630   /* store lengths of each row and write (including header) to file */
1631   count = 0;
1632   for (i=0; i<a->mbs; i++) {
1633     for (j=0; j<bs; j++) {
1634       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1635     }
1636   }
1637   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1638   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1639 
1640   /* store column indices (zero start index) */
1641   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1642   count = 0;
1643   for (i=0; i<a->mbs; i++) {
1644     for (j=0; j<bs; j++) {
1645       for (k=a->i[i]; k<a->i[i+1]; k++) {
1646         for (l=0; l<bs; l++) {
1647           jj[count++] = bs*a->j[k] + l;
1648         }
1649       }
1650     }
1651   }
1652   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1653   ierr = PetscFree(jj);CHKERRQ(ierr);
1654 
1655   /* store nonzero values */
1656   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1657   count = 0;
1658   for (i=0; i<a->mbs; i++) {
1659     for (j=0; j<bs; j++) {
1660       for (k=a->i[i]; k<a->i[i+1]; k++) {
1661         for (l=0; l<bs; l++) {
1662           aa[count++] = a->a[bs2*k + l*bs + j];
1663         }
1664       }
1665     }
1666   }
1667   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1668   ierr = PetscFree(aa);CHKERRQ(ierr);
1669 
1670   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1671   if (file) {
1672     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1673   }
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1679 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1680 {
1681   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1682   PetscErrorCode    ierr;
1683   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1684   PetscViewerFormat format;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1688   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1689     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1690   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1691     Mat aij;
1692     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1693     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1694     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1695   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1696       PetscFunctionReturn(0);
1697   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1698     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1699     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1700     for (i=0; i<a->mbs; i++) {
1701       for (j=0; j<bs; j++) {
1702         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1703         for (k=a->i[i]; k<a->i[i+1]; k++) {
1704           for (l=0; l<bs; l++) {
1705 #if defined(PETSC_USE_COMPLEX)
1706             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1707               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1708                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1709             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1710               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1711                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1712             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1713               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1714             }
1715 #else
1716             if (a->a[bs2*k + l*bs + j] != 0.0) {
1717               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1718             }
1719 #endif
1720           }
1721         }
1722         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1723       }
1724     }
1725     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1726   } else {
1727     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1728     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1729     for (i=0; i<a->mbs; i++) {
1730       for (j=0; j<bs; j++) {
1731         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1732         for (k=a->i[i]; k<a->i[i+1]; k++) {
1733           for (l=0; l<bs; l++) {
1734 #if defined(PETSC_USE_COMPLEX)
1735             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1737                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1738             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1740                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1741             } else {
1742               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1743             }
1744 #else
1745             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1746 #endif
1747           }
1748         }
1749         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1750       }
1751     }
1752     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1753   }
1754   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #include <petscdraw.h>
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1761 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1762 {
1763   Mat               A = (Mat) Aa;
1764   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1765   PetscErrorCode    ierr;
1766   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1767   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1768   MatScalar         *aa;
1769   PetscViewer       viewer;
1770   PetscViewerFormat format;
1771 
1772   PetscFunctionBegin;
1773   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1774   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1775 
1776   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1777 
1778   /* loop over matrix elements drawing boxes */
1779 
1780   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1781     color = PETSC_DRAW_BLUE;
1782     for (i=0,row=0; i<mbs; i++,row+=bs) {
1783       for (j=a->i[i]; j<a->i[i+1]; j++) {
1784         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1785         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1786         aa  = a->a + j*bs2;
1787         for (k=0; k<bs; k++) {
1788           for (l=0; l<bs; l++) {
1789             if (PetscRealPart(*aa++) >=  0.) continue;
1790             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1791           }
1792         }
1793       }
1794     }
1795     color = PETSC_DRAW_CYAN;
1796     for (i=0,row=0; i<mbs; i++,row+=bs) {
1797       for (j=a->i[i]; j<a->i[i+1]; j++) {
1798         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1799         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1800         aa  = a->a + j*bs2;
1801         for (k=0; k<bs; k++) {
1802           for (l=0; l<bs; l++) {
1803             if (PetscRealPart(*aa++) != 0.) continue;
1804             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1805           }
1806         }
1807       }
1808     }
1809     color = PETSC_DRAW_RED;
1810     for (i=0,row=0; i<mbs; i++,row+=bs) {
1811       for (j=a->i[i]; j<a->i[i+1]; j++) {
1812         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1813         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1814         aa  = a->a + j*bs2;
1815         for (k=0; k<bs; k++) {
1816           for (l=0; l<bs; l++) {
1817             if (PetscRealPart(*aa++) <= 0.) continue;
1818             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1819           }
1820         }
1821       }
1822     }
1823   } else {
1824     /* use contour shading to indicate magnitude of values */
1825     /* first determine max of all nonzero values */
1826     PetscDraw popup;
1827     PetscReal scale,maxv = 0.0;
1828 
1829     for (i=0; i<a->nz*a->bs2; i++) {
1830       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1831     }
1832     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1833     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1834     if (popup) {
1835       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1836     }
1837     for (i=0,row=0; i<mbs; i++,row+=bs) {
1838       for (j=a->i[i]; j<a->i[i+1]; j++) {
1839         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1840         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1841         aa  = a->a + j*bs2;
1842         for (k=0; k<bs; k++) {
1843           for (l=0; l<bs; l++) {
1844             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1845             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1846           }
1847         }
1848       }
1849     }
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1856 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1857 {
1858   PetscErrorCode ierr;
1859   PetscReal      xl,yl,xr,yr,w,h;
1860   PetscDraw      draw;
1861   PetscBool      isnull;
1862 
1863   PetscFunctionBegin;
1864   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1865   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1866 
1867   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1868   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1869   xr  += w;    yr += h;  xl = -w;     yl = -h;
1870   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1871   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1872   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatView_SeqBAIJ"
1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1879 {
1880   PetscErrorCode ierr;
1881   PetscBool      iascii,isbinary,isdraw;
1882 
1883   PetscFunctionBegin;
1884   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1885   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1887   if (iascii) {
1888     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1889   } else if (isbinary) {
1890     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1891   } else if (isdraw) {
1892     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1893   } else {
1894     Mat B;
1895     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1896     ierr = MatView(B,viewer);CHKERRQ(ierr);
1897     ierr = MatDestroy(&B);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1906 {
1907   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1908   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1909   PetscInt    *ai = a->i,*ailen = a->ilen;
1910   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1911   MatScalar   *ap,*aa = a->a;
1912 
1913   PetscFunctionBegin;
1914   for (k=0; k<m; k++) { /* loop over rows */
1915     row = im[k]; brow = row/bs;
1916     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1917     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1918     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1919     nrow = ailen[brow];
1920     for (l=0; l<n; l++) { /* loop over columns */
1921       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1922       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1923       col  = in[l];
1924       bcol = col/bs;
1925       cidx = col%bs;
1926       ridx = row%bs;
1927       high = nrow;
1928       low  = 0; /* assume unsorted */
1929       while (high-low > 5) {
1930         t = (low+high)/2;
1931         if (rp[t] > bcol) high = t;
1932         else             low  = t;
1933       }
1934       for (i=low; i<high; i++) {
1935         if (rp[i] > bcol) break;
1936         if (rp[i] == bcol) {
1937           *v++ = ap[bs2*i+bs*cidx+ridx];
1938           goto finished;
1939         }
1940       }
1941       *v++ = 0.0;
1942 finished:;
1943     }
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1951 {
1952   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1953   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1954   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1955   PetscErrorCode    ierr;
1956   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1957   PetscBool         roworiented=a->roworiented;
1958   const PetscScalar *value     = v;
1959   MatScalar         *ap,*aa = a->a,*bap;
1960 
1961   PetscFunctionBegin;
1962   if (roworiented) {
1963     stepval = (n-1)*bs;
1964   } else {
1965     stepval = (m-1)*bs;
1966   }
1967   for (k=0; k<m; k++) { /* loop over added rows */
1968     row = im[k];
1969     if (row < 0) continue;
1970 #if defined(PETSC_USE_DEBUG)
1971     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1972 #endif
1973     rp   = aj + ai[row];
1974     ap   = aa + bs2*ai[row];
1975     rmax = imax[row];
1976     nrow = ailen[row];
1977     low  = 0;
1978     high = nrow;
1979     for (l=0; l<n; l++) { /* loop over added columns */
1980       if (in[l] < 0) continue;
1981 #if defined(PETSC_USE_DEBUG)
1982       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1983 #endif
1984       col = in[l];
1985       if (roworiented) {
1986         value = v + (k*(stepval+bs) + l)*bs;
1987       } else {
1988         value = v + (l*(stepval+bs) + k)*bs;
1989       }
1990       if (col <= lastcol) low = 0;
1991       else high = nrow;
1992       lastcol = col;
1993       while (high-low > 7) {
1994         t = (low+high)/2;
1995         if (rp[t] > col) high = t;
1996         else             low  = t;
1997       }
1998       for (i=low; i<high; i++) {
1999         if (rp[i] > col) break;
2000         if (rp[i] == col) {
2001           bap = ap +  bs2*i;
2002           if (roworiented) {
2003             if (is == ADD_VALUES) {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] += *value++;
2007                 }
2008               }
2009             } else {
2010               for (ii=0; ii<bs; ii++,value+=stepval) {
2011                 for (jj=ii; jj<bs2; jj+=bs) {
2012                   bap[jj] = *value++;
2013                 }
2014               }
2015             }
2016           } else {
2017             if (is == ADD_VALUES) {
2018               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2019                 for (jj=0; jj<bs; jj++) {
2020                   bap[jj] += value[jj];
2021                 }
2022                 bap += bs;
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2026                 for (jj=0; jj<bs; jj++) {
2027                   bap[jj]  = value[jj];
2028                 }
2029                 bap += bs;
2030               }
2031             }
2032           }
2033           goto noinsert2;
2034         }
2035       }
2036       if (nonew == 1) goto noinsert2;
2037       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2038       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2039       N = nrow++ - 1; high++;
2040       /* shift up all the later entries in this row */
2041       for (ii=N; ii>=i; ii--) {
2042         rp[ii+1] = rp[ii];
2043         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2044       }
2045       if (N >= i) {
2046         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2047       }
2048       rp[i] = col;
2049       bap   = ap +  bs2*i;
2050       if (roworiented) {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=ii; jj<bs2; jj+=bs) {
2053             bap[jj] = *value++;
2054           }
2055         }
2056       } else {
2057         for (ii=0; ii<bs; ii++,value+=stepval) {
2058           for (jj=0; jj<bs; jj++) {
2059             *bap++ = *value++;
2060           }
2061         }
2062       }
2063 noinsert2:;
2064       low = i;
2065     }
2066     ailen[row] = nrow;
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2074 {
2075   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2076   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2077   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2078   PetscErrorCode ierr;
2079   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2080   MatScalar      *aa  = a->a,*ap;
2081   PetscReal      ratio=0.6;
2082 
2083   PetscFunctionBegin;
2084   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2085 
2086   if (m) rmax = ailen[0];
2087   for (i=1; i<mbs; i++) {
2088     /* move each row back by the amount of empty slots (fshift) before it*/
2089     fshift += imax[i-1] - ailen[i-1];
2090     rmax    = PetscMax(rmax,ailen[i]);
2091     if (fshift) {
2092       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2093       N  = ailen[i];
2094       for (j=0; j<N; j++) {
2095         ip[j-fshift] = ip[j];
2096 
2097         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2098       }
2099     }
2100     ai[i] = ai[i-1] + ailen[i-1];
2101   }
2102   if (mbs) {
2103     fshift += imax[mbs-1] - ailen[mbs-1];
2104     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2105   }
2106   /* reset ilen and imax for each row */
2107   for (i=0; i<mbs; i++) {
2108     ailen[i] = imax[i] = ai[i+1] - ai[i];
2109   }
2110   a->nz = ai[mbs];
2111 
2112   /* diagonals may have moved, so kill the diagonal pointers */
2113   a->idiagvalid = PETSC_FALSE;
2114   if (fshift && a->diag) {
2115     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2116     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2117     a->diag = 0;
2118   }
2119   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2120   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2121   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2122   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2123 
2124   A->info.mallocs    += a->reallocs;
2125   a->reallocs         = 0;
2126   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2127 
2128   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2129 
2130   A->same_nonzero = PETSC_TRUE;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*
2135    This function returns an array of flags which indicate the locations of contiguous
2136    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2137    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2138    Assume: sizes should be long enough to hold all the values.
2139 */
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2143 {
2144   PetscInt  i,j,k,row;
2145   PetscBool flg;
2146 
2147   PetscFunctionBegin;
2148   for (i=0,j=0; i<n; j++) {
2149     row = idx[i];
2150     if (row%bs!=0) { /* Not the begining of a block */
2151       sizes[j] = 1;
2152       i++;
2153     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2154       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2155       i++;
2156     } else { /* Begining of the block, so check if the complete block exists */
2157       flg = PETSC_TRUE;
2158       for (k=1; k<bs; k++) {
2159         if (row+k != idx[i+k]) { /* break in the block */
2160           flg = PETSC_FALSE;
2161           break;
2162         }
2163       }
2164       if (flg) { /* No break in the bs */
2165         sizes[j] = bs;
2166         i       += bs;
2167       } else {
2168         sizes[j] = 1;
2169         i++;
2170       }
2171     }
2172   }
2173   *bs_max = j;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2180 {
2181   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2182   PetscErrorCode    ierr;
2183   PetscInt          i,j,k,count,*rows;
2184   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2185   PetscScalar       zero = 0.0;
2186   MatScalar         *aa;
2187   const PetscScalar *xx;
2188   PetscScalar       *bb;
2189 
2190   PetscFunctionBegin;
2191   /* fix right hand side if needed */
2192   if (x && b) {
2193     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2194     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2195     for (i=0; i<is_n; i++) {
2196       bb[is_idx[i]] = diag*xx[is_idx[i]];
2197     }
2198     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2199     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2200   }
2201 
2202   /* Make a copy of the IS and  sort it */
2203   /* allocate memory for rows,sizes */
2204   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2205 
2206   /* copy IS values to rows, and sort them */
2207   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2208   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2209 
2210   if (baij->keepnonzeropattern) {
2211     for (i=0; i<is_n; i++) sizes[i] = 1;
2212     bs_max          = is_n;
2213     A->same_nonzero = PETSC_TRUE;
2214   } else {
2215     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2216 
2217     A->same_nonzero = PETSC_FALSE;
2218   }
2219 
2220   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2221     row = rows[j];
2222     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2223     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2224     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2225     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2226       if (diag != (PetscScalar)0.0) {
2227         if (baij->ilen[row/bs] > 0) {
2228           baij->ilen[row/bs]       = 1;
2229           baij->j[baij->i[row/bs]] = row/bs;
2230 
2231           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2232         }
2233         /* Now insert all the diagonal values for this bs */
2234         for (k=0; k<bs; k++) {
2235           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2236         }
2237       } else { /* (diag == 0.0) */
2238         baij->ilen[row/bs] = 0;
2239       } /* end (diag == 0.0) */
2240     } else { /* (sizes[i] != bs) */
2241 #if defined(PETSC_USE_DEBUG)
2242       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2243 #endif
2244       for (k=0; k<count; k++) {
2245         aa[0] =  zero;
2246         aa   += bs;
2247       }
2248       if (diag != (PetscScalar)0.0) {
2249         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2250       }
2251     }
2252   }
2253 
2254   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2255   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2261 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2262 {
2263   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2264   PetscErrorCode    ierr;
2265   PetscInt          i,j,k,count;
2266   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2267   PetscScalar       zero = 0.0;
2268   MatScalar         *aa;
2269   const PetscScalar *xx;
2270   PetscScalar       *bb;
2271   PetscBool         *zeroed,vecs = PETSC_FALSE;
2272 
2273   PetscFunctionBegin;
2274   /* fix right hand side if needed */
2275   if (x && b) {
2276     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2277     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2278     vecs = PETSC_TRUE;
2279   }
2280   A->same_nonzero = PETSC_TRUE;
2281 
2282   /* zero the columns */
2283   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2284   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2285   for (i=0; i<is_n; i++) {
2286     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2287     zeroed[is_idx[i]] = PETSC_TRUE;
2288   }
2289   for (i=0; i<A->rmap->N; i++) {
2290     if (!zeroed[i]) {
2291       row = i/bs;
2292       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2293         for (k=0; k<bs; k++) {
2294           col = bs*baij->j[j] + k;
2295           if (zeroed[col]) {
2296             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2297             if (vecs) bb[i] -= aa[0]*xx[col];
2298             aa[0] = 0.0;
2299           }
2300         }
2301       }
2302     } else if (vecs) bb[i] = diag*xx[i];
2303   }
2304   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2305   if (vecs) {
2306     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2307     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2308   }
2309 
2310   /* zero the rows */
2311   for (i=0; i<is_n; i++) {
2312     row   = is_idx[i];
2313     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2314     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2315     for (k=0; k<count; k++) {
2316       aa[0] =  zero;
2317       aa   += bs;
2318     }
2319     if (diag != (PetscScalar)0.0) {
2320       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2321     }
2322   }
2323   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2329 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2330 {
2331   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2332   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2333   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2334   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2335   PetscErrorCode ierr;
2336   PetscInt       ridx,cidx,bs2=a->bs2;
2337   PetscBool      roworiented=a->roworiented;
2338   MatScalar      *ap,value,*aa=a->a,*bap;
2339 
2340   PetscFunctionBegin;
2341   if (v) PetscValidScalarPointer(v,6);
2342   for (k=0; k<m; k++) { /* loop over added rows */
2343     row  = im[k];
2344     brow = row/bs;
2345     if (row < 0) continue;
2346 #if defined(PETSC_USE_DEBUG)
2347     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2348 #endif
2349     rp   = aj + ai[brow];
2350     ap   = aa + bs2*ai[brow];
2351     rmax = imax[brow];
2352     nrow = ailen[brow];
2353     low  = 0;
2354     high = nrow;
2355     for (l=0; l<n; l++) { /* loop over added columns */
2356       if (in[l] < 0) continue;
2357 #if defined(PETSC_USE_DEBUG)
2358       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2359 #endif
2360       col  = in[l]; bcol = col/bs;
2361       ridx = row % bs; cidx = col % bs;
2362       if (roworiented) {
2363         value = v[l + k*n];
2364       } else {
2365         value = v[k + l*m];
2366       }
2367       if (col <= lastcol) low = 0; else high = nrow;
2368       lastcol = col;
2369       while (high-low > 7) {
2370         t = (low+high)/2;
2371         if (rp[t] > bcol) high = t;
2372         else              low  = t;
2373       }
2374       for (i=low; i<high; i++) {
2375         if (rp[i] > bcol) break;
2376         if (rp[i] == bcol) {
2377           bap = ap +  bs2*i + bs*cidx + ridx;
2378           if (is == ADD_VALUES) *bap += value;
2379           else                  *bap  = value;
2380           goto noinsert1;
2381         }
2382       }
2383       if (nonew == 1) goto noinsert1;
2384       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2385       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2386       N = nrow++ - 1; high++;
2387       /* shift up all the later entries in this row */
2388       for (ii=N; ii>=i; ii--) {
2389         rp[ii+1] = rp[ii];
2390         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2391       }
2392       if (N>=i) {
2393         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2394       }
2395       rp[i]                      = bcol;
2396       ap[bs2*i + bs*cidx + ridx] = value;
2397       a->nz++;
2398 noinsert1:;
2399       low = i;
2400     }
2401     ailen[brow] = nrow;
2402   }
2403   A->same_nonzero = PETSC_FALSE;
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2409 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2410 {
2411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2412   Mat            outA;
2413   PetscErrorCode ierr;
2414   PetscBool      row_identity,col_identity;
2415 
2416   PetscFunctionBegin;
2417   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2418   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2419   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2420   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2421 
2422   outA            = inA;
2423   inA->factortype = MAT_FACTOR_LU;
2424 
2425   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2426 
2427   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2428   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2429   a->row = row;
2430   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2431   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2432   a->col = col;
2433 
2434   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2435   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2436   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2437   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2438 
2439   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2440   if (!a->solve_work) {
2441     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2442     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2443   }
2444   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 EXTERN_C_BEGIN
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2451 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2452 {
2453   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2454   PetscInt    i,nz,mbs;
2455 
2456   PetscFunctionBegin;
2457   nz  = baij->maxnz;
2458   mbs = baij->mbs;
2459   for (i=0; i<nz; i++) {
2460     baij->j[i] = indices[i];
2461   }
2462   baij->nz = nz;
2463   for (i=0; i<mbs; i++) {
2464     baij->ilen[i] = baij->imax[i];
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 EXTERN_C_END
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2472 /*@
2473     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2474        in the matrix.
2475 
2476   Input Parameters:
2477 +  mat - the SeqBAIJ matrix
2478 -  indices - the column indices
2479 
2480   Level: advanced
2481 
2482   Notes:
2483     This can be called if you have precomputed the nonzero structure of the
2484   matrix and want to provide it to the matrix object to improve the performance
2485   of the MatSetValues() operation.
2486 
2487     You MUST have set the correct numbers of nonzeros per row in the call to
2488   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2489 
2490     MUST be called before any calls to MatSetValues();
2491 
2492 @*/
2493 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2494 {
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2499   PetscValidPointer(indices,2);
2500   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2506 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2507 {
2508   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2509   PetscErrorCode ierr;
2510   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2511   PetscReal      atmp;
2512   PetscScalar    *x,zero = 0.0;
2513   MatScalar      *aa;
2514   PetscInt       ncols,brow,krow,kcol;
2515 
2516   PetscFunctionBegin;
2517   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2518   bs  = A->rmap->bs;
2519   aa  = a->a;
2520   ai  = a->i;
2521   aj  = a->j;
2522   mbs = a->mbs;
2523 
2524   ierr = VecSet(v,zero);CHKERRQ(ierr);
2525   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2526   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2527   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2528   for (i=0; i<mbs; i++) {
2529     ncols = ai[1] - ai[0]; ai++;
2530     brow  = bs*i;
2531     for (j=0; j<ncols; j++) {
2532       for (kcol=0; kcol<bs; kcol++) {
2533         for (krow=0; krow<bs; krow++) {
2534           atmp = PetscAbsScalar(*aa);aa++;
2535           row  = brow + krow;   /* row index */
2536           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2537           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2538         }
2539       }
2540       aj++;
2541     }
2542   }
2543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatCopy_SeqBAIJ"
2549 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2550 {
2551   PetscErrorCode ierr;
2552 
2553   PetscFunctionBegin;
2554   /* If the two matrices have the same copy implementation, use fast copy. */
2555   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2556     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2557     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2558     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2559 
2560     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2561     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2562     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2563   } else {
2564     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2571 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2572 {
2573   PetscErrorCode ierr;
2574 
2575   PetscFunctionBegin;
2576   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 #undef __FUNCT__
2581 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2582 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2583 {
2584   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2585 
2586   PetscFunctionBegin;
2587   *array = a->a;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2593 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2594 {
2595   PetscFunctionBegin;
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2601 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2602 {
2603   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2604   PetscErrorCode ierr;
2605   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2606   PetscBLASInt   one=1;
2607 
2608   PetscFunctionBegin;
2609   if (str == SAME_NONZERO_PATTERN) {
2610     PetscScalar  alpha = a;
2611     PetscBLASInt bnz;
2612     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2613     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2614   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2615     if (y->xtoy && y->XtoY != X) {
2616       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2617       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2618     }
2619     if (!y->xtoy) { /* get xtoy */
2620       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2621       y->XtoY = X;
2622       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2623     }
2624     for (i=0; i<x->nz; i++) {
2625       j = 0;
2626       while (j < bs2) {
2627         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2628         j++;
2629       }
2630     }
2631     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2632   } else {
2633     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2634   }
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2640 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2641 {
2642   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2643   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2644   MatScalar   *aa = a->a;
2645 
2646   PetscFunctionBegin;
2647   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2653 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar   *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2668 /*
2669     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2670 */
2671 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2672 {
2673   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2674   PetscErrorCode ierr;
2675   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2676   PetscInt       nz = a->i[m],row,*jj,mr,col;
2677 
2678   PetscFunctionBegin;
2679   *nn = n;
2680   if (!ia) PetscFunctionReturn(0);
2681   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2682   else {
2683     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2684     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2685     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2686     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2687     jj   = a->j;
2688     for (i=0; i<nz; i++) {
2689       collengths[jj[i]]++;
2690     }
2691     cia[0] = oshift;
2692     for (i=0; i<n; i++) {
2693       cia[i+1] = cia[i] + collengths[i];
2694     }
2695     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2696     jj   = a->j;
2697     for (row=0; row<m; row++) {
2698       mr = a->i[row+1] - a->i[row];
2699       for (i=0; i<mr; i++) {
2700         col = *jj++;
2701 
2702         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2703       }
2704     }
2705     ierr = PetscFree(collengths);CHKERRQ(ierr);
2706     *ia  = cia; *ja = cja;
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2713 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2714 {
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   if (!ia) PetscFunctionReturn(0);
2719   ierr = PetscFree(*ia);CHKERRQ(ierr);
2720   ierr = PetscFree(*ja);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2726 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2727 {
2728   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2729   PetscErrorCode ierr;
2730   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2731   PetscScalar    dx,*y,*xx,*w3_array;
2732   PetscScalar    *vscale_array;
2733   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2734   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2735   void           *fctx   = coloring->fctx;
2736   PetscBool      flg     = PETSC_FALSE;
2737   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2738   Vec            x1_tmp;
2739 
2740   PetscFunctionBegin;
2741   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2742   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2743   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2744   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2745 
2746   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2747   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2748   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2749   if (flg) {
2750     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2751   } else {
2752     PetscBool assembled;
2753     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2754     if (assembled) {
2755       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2756     }
2757   }
2758 
2759   x1_tmp = x1;
2760   if (!coloring->vscale) {
2761     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2762   }
2763 
2764   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2765     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2766   }
2767   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2768 
2769   /* Set w1 = F(x1) */
2770   if (!coloring->fset) {
2771     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2772     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2773     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2774   } else {
2775     coloring->fset = PETSC_FALSE;
2776   }
2777 
2778   if (!coloring->w3) {
2779     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2781   }
2782   w3 = coloring->w3;
2783 
2784   CHKMEMQ;
2785   /* Compute all the local scale factors, including ghost points */
2786   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2787   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2788   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2789   if (ctype == IS_COLORING_GHOSTED) {
2790     col_start = 0; col_end = N;
2791   } else if (ctype == IS_COLORING_GLOBAL) {
2792     xx           = xx - start;
2793     vscale_array = vscale_array - start;
2794     col_start    = start; col_end = N + start;
2795   }
2796   CHKMEMQ;
2797   for (col=col_start; col<col_end; col++) {
2798     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2799     if (coloring->htype[0] == 'w') {
2800       dx = 1.0 + unorm;
2801     } else {
2802       dx = xx[col];
2803     }
2804     if (dx == (PetscScalar)0.0) dx = 1.0;
2805     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2806     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2807     dx               *= epsilon;
2808     vscale_array[col] = (PetscScalar)1.0/dx;
2809   }     CHKMEMQ;
2810   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2811   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2812   if (ctype == IS_COLORING_GLOBAL) {
2813     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2814     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2815   }
2816   CHKMEMQ;
2817   if (coloring->vscaleforrow) {
2818     vscaleforrow = coloring->vscaleforrow;
2819   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2820 
2821   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2822   /*
2823     Loop over each color
2824   */
2825   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2826   for (k=0; k<coloring->ncolors; k++) {
2827     coloring->currentcolor = k;
2828     for (i=0; i<bs; i++) {
2829       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2830       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2831       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2832       /*
2833         Loop over each column associated with color
2834         adding the perturbation to the vector w3.
2835       */
2836       for (l=0; l<coloring->ncolumns[k]; l++) {
2837         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2838         if (coloring->htype[0] == 'w') {
2839           dx = 1.0 + unorm;
2840         } else {
2841           dx = xx[col];
2842         }
2843         if (dx == (PetscScalar)0.0) dx = 1.0;
2844         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2845         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2846         dx *= epsilon;
2847         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2848         w3_array[col] += dx;
2849       }
2850       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2851       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2852 
2853       /*
2854         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2855         w2 = F(x1 + dx) - F(x1)
2856       */
2857       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2858       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2859       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2860       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2861 
2862       /*
2863         Loop over rows of vector, putting results into Jacobian matrix
2864       */
2865       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2866       for (l=0; l<coloring->nrows[k]; l++) {
2867         row = bs*coloring->rows[k][l];                /* local row index */
2868         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2869         for (j=0; j<bs; j++) {
2870           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2871           srows[j]  = row + start + j;
2872         }
2873         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2874       }
2875       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2876     }
2877   } /* endof for each color */
2878   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2879   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2880   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2881   ierr = PetscFree(srows);CHKERRQ(ierr);
2882 
2883   coloring->currentcolor = -1;
2884 
2885   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 /* -------------------------------------------------------------------*/
2892 static struct _MatOps MatOps_Values = {
2893         MatSetValues_SeqBAIJ,
2894         MatGetRow_SeqBAIJ,
2895         MatRestoreRow_SeqBAIJ,
2896         MatMult_SeqBAIJ_N,
2897 /* 4*/  MatMultAdd_SeqBAIJ_N,
2898         MatMultTranspose_SeqBAIJ,
2899         MatMultTransposeAdd_SeqBAIJ,
2900         0,
2901         0,
2902         0,
2903 /* 10*/ 0,
2904         MatLUFactor_SeqBAIJ,
2905         0,
2906         0,
2907         MatTranspose_SeqBAIJ,
2908 /* 15*/ MatGetInfo_SeqBAIJ,
2909         MatEqual_SeqBAIJ,
2910         MatGetDiagonal_SeqBAIJ,
2911         MatDiagonalScale_SeqBAIJ,
2912         MatNorm_SeqBAIJ,
2913 /* 20*/ 0,
2914         MatAssemblyEnd_SeqBAIJ,
2915         MatSetOption_SeqBAIJ,
2916         MatZeroEntries_SeqBAIJ,
2917 /* 24*/ MatZeroRows_SeqBAIJ,
2918         0,
2919         0,
2920         0,
2921         0,
2922 /* 29*/ MatSetUp_SeqBAIJ,
2923         0,
2924         0,
2925         0,
2926         0,
2927 /* 34*/ MatDuplicate_SeqBAIJ,
2928         0,
2929         0,
2930         MatILUFactor_SeqBAIJ,
2931         0,
2932 /* 39*/ MatAXPY_SeqBAIJ,
2933         MatGetSubMatrices_SeqBAIJ,
2934         MatIncreaseOverlap_SeqBAIJ,
2935         MatGetValues_SeqBAIJ,
2936         MatCopy_SeqBAIJ,
2937 /* 44*/ 0,
2938         MatScale_SeqBAIJ,
2939         0,
2940         0,
2941         MatZeroRowsColumns_SeqBAIJ,
2942 /* 49*/ 0,
2943         MatGetRowIJ_SeqBAIJ,
2944         MatRestoreRowIJ_SeqBAIJ,
2945         MatGetColumnIJ_SeqBAIJ,
2946         MatRestoreColumnIJ_SeqBAIJ,
2947 /* 54*/ MatFDColoringCreate_SeqAIJ,
2948         0,
2949         0,
2950         0,
2951         MatSetValuesBlocked_SeqBAIJ,
2952 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2953         MatDestroy_SeqBAIJ,
2954         MatView_SeqBAIJ,
2955         0,
2956         0,
2957 /* 64*/ 0,
2958         0,
2959         0,
2960         0,
2961         0,
2962 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2963         0,
2964         MatConvert_Basic,
2965         0,
2966         0,
2967 /* 74*/ 0,
2968         MatFDColoringApply_BAIJ,
2969         0,
2970         0,
2971         0,
2972 /* 79*/ 0,
2973         0,
2974         0,
2975         0,
2976         MatLoad_SeqBAIJ,
2977 /* 84*/ 0,
2978         0,
2979         0,
2980         0,
2981         0,
2982 /* 89*/ 0,
2983         0,
2984         0,
2985         0,
2986         0,
2987 /* 94*/ 0,
2988         0,
2989         0,
2990         0,
2991         0,
2992 /* 99*/ 0,
2993         0,
2994         0,
2995         0,
2996         0,
2997 /*104*/ 0,
2998         MatRealPart_SeqBAIJ,
2999         MatImaginaryPart_SeqBAIJ,
3000         0,
3001         0,
3002 /*109*/ 0,
3003         0,
3004         0,
3005         0,
3006         MatMissingDiagonal_SeqBAIJ,
3007 /*114*/ 0,
3008         0,
3009         0,
3010         0,
3011         0,
3012 /*119*/ 0,
3013         0,
3014         MatMultHermitianTranspose_SeqBAIJ,
3015         MatMultHermitianTransposeAdd_SeqBAIJ,
3016         0,
3017 /*124*/ 0,
3018         0,
3019         MatInvertBlockDiagonal_SeqBAIJ
3020 };
3021 
3022 EXTERN_C_BEGIN
3023 #undef __FUNCT__
3024 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3025 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3026 {
3027   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3028   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3029   PetscErrorCode ierr;
3030 
3031   PetscFunctionBegin;
3032   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3033 
3034   /* allocate space for values if not already there */
3035   if (!aij->saved_values) {
3036     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3037     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3038   }
3039 
3040   /* copy values over */
3041   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3042   PetscFunctionReturn(0);
3043 }
3044 EXTERN_C_END
3045 
3046 EXTERN_C_BEGIN
3047 #undef __FUNCT__
3048 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3049 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3050 {
3051   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3052   PetscErrorCode ierr;
3053   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3054 
3055   PetscFunctionBegin;
3056   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3057   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3058 
3059   /* copy values over */
3060   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 EXTERN_C_BEGIN
3066 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3067 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3068 EXTERN_C_END
3069 
3070 EXTERN_C_BEGIN
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3073 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3074 {
3075   Mat_SeqBAIJ    *b;
3076   PetscErrorCode ierr;
3077   PetscInt       i,mbs,nbs,bs2;
3078   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3079 
3080   PetscFunctionBegin;
3081   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3082   if (nz == MAT_SKIP_ALLOCATION) {
3083     skipallocation = PETSC_TRUE;
3084     nz             = 0;
3085   }
3086 
3087   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3088   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3089   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3090   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3091   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3092 
3093   B->preallocated = PETSC_TRUE;
3094 
3095   mbs = B->rmap->n/bs;
3096   nbs = B->cmap->n/bs;
3097   bs2 = bs*bs;
3098 
3099   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3100 
3101   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3102   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3103   if (nnz) {
3104     for (i=0; i<mbs; i++) {
3105       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3106       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3107     }
3108   }
3109 
3110   b    = (Mat_SeqBAIJ*)B->data;
3111   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3112   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3113   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3114 
3115   if (!flg) {
3116     switch (bs) {
3117     case 1:
3118       B->ops->mult    = MatMult_SeqBAIJ_1;
3119       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3120       B->ops->sor     = MatSOR_SeqBAIJ_1;
3121       break;
3122     case 2:
3123       B->ops->mult    = MatMult_SeqBAIJ_2;
3124       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3125       B->ops->sor     = MatSOR_SeqBAIJ_2;
3126       break;
3127     case 3:
3128       B->ops->mult    = MatMult_SeqBAIJ_3;
3129       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3130       B->ops->sor     = MatSOR_SeqBAIJ_3;
3131       break;
3132     case 4:
3133       B->ops->mult    = MatMult_SeqBAIJ_4;
3134       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3135       B->ops->sor     = MatSOR_SeqBAIJ_4;
3136       break;
3137     case 5:
3138       B->ops->mult    = MatMult_SeqBAIJ_5;
3139       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3140       B->ops->sor     = MatSOR_SeqBAIJ_5;
3141       break;
3142     case 6:
3143       B->ops->mult    = MatMult_SeqBAIJ_6;
3144       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3145       B->ops->sor     = MatSOR_SeqBAIJ_6;
3146       break;
3147     case 7:
3148       B->ops->mult    = MatMult_SeqBAIJ_7;
3149       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3150       B->ops->sor     = MatSOR_SeqBAIJ_7;
3151       break;
3152     case 15:
3153       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3154       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3155       B->ops->sor     = MatSOR_SeqBAIJ_N;
3156       break;
3157     default:
3158       B->ops->mult    = MatMult_SeqBAIJ_N;
3159       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3160       B->ops->sor     = MatSOR_SeqBAIJ_N;
3161       break;
3162     }
3163   }
3164   b->mbs = mbs;
3165   b->nbs = nbs;
3166   if (!skipallocation) {
3167     if (!b->imax) {
3168       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3169       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3170 
3171       b->free_imax_ilen = PETSC_TRUE;
3172     }
3173     /* b->ilen will count nonzeros in each block row so far. */
3174     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3175     if (!nnz) {
3176       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3177       else if (nz < 0) nz = 1;
3178       for (i=0; i<mbs; i++) b->imax[i] = nz;
3179       nz = nz*mbs;
3180     } else {
3181       nz = 0;
3182       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3183     }
3184 
3185     /* allocate the matrix space */
3186     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3187     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3188     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3189     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3190     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3191 
3192     b->singlemalloc = PETSC_TRUE;
3193     b->i[0]         = 0;
3194     for (i=1; i<mbs+1; i++) {
3195       b->i[i] = b->i[i-1] + b->imax[i-1];
3196     }
3197     b->free_a  = PETSC_TRUE;
3198     b->free_ij = PETSC_TRUE;
3199   } else {
3200     b->free_a  = PETSC_FALSE;
3201     b->free_ij = PETSC_FALSE;
3202   }
3203 
3204   b->bs2              = bs2;
3205   b->mbs              = mbs;
3206   b->nz               = 0;
3207   b->maxnz            = nz;
3208   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3209   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3210   PetscFunctionReturn(0);
3211 }
3212 EXTERN_C_END
3213 
3214 EXTERN_C_BEGIN
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3217 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3218 {
3219   PetscInt       i,m,nz,nz_max=0,*nnz;
3220   PetscScalar    *values=0;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3225   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3226   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3227   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3228   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3229   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3230   m    = B->rmap->n/bs;
3231 
3232   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3233   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3234   for (i=0; i<m; i++) {
3235     nz = ii[i+1]- ii[i];
3236     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3237     nz_max = PetscMax(nz_max, nz);
3238     nnz[i] = nz;
3239   }
3240   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3241   ierr = PetscFree(nnz);CHKERRQ(ierr);
3242 
3243   values = (PetscScalar*)V;
3244   if (!values) {
3245     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3246     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3247   }
3248   for (i=0; i<m; i++) {
3249     PetscInt          ncols  = ii[i+1] - ii[i];
3250     const PetscInt    *icols = jj + ii[i];
3251     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3252     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3253   }
3254   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3255   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3258   PetscFunctionReturn(0);
3259 }
3260 EXTERN_C_END
3261 
3262 
3263 EXTERN_C_BEGIN
3264 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3265 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3266 #if defined(PETSC_HAVE_MUMPS)
3267 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3268 #endif
3269 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3270 EXTERN_C_END
3271 
3272 /*MC
3273    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3274    block sparse compressed row format.
3275 
3276    Options Database Keys:
3277 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3278 
3279   Level: beginner
3280 
3281 .seealso: MatCreateSeqBAIJ()
3282 M*/
3283 
3284 EXTERN_C_BEGIN
3285 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3286 EXTERN_C_END
3287 
3288 EXTERN_C_BEGIN
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatCreate_SeqBAIJ"
3291 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3292 {
3293   PetscErrorCode ierr;
3294   PetscMPIInt    size;
3295   Mat_SeqBAIJ    *b;
3296 
3297   PetscFunctionBegin;
3298   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3299   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3300 
3301   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3302   B->data = (void*)b;
3303   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3304 
3305   b->row          = 0;
3306   b->col          = 0;
3307   b->icol         = 0;
3308   b->reallocs     = 0;
3309   b->saved_values = 0;
3310 
3311   b->roworiented        = PETSC_TRUE;
3312   b->nonew              = 0;
3313   b->diag               = 0;
3314   b->solve_work         = 0;
3315   b->mult_work          = 0;
3316   B->spptr              = 0;
3317   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3318   b->keepnonzeropattern = PETSC_FALSE;
3319   b->xtoy               = 0;
3320   b->XtoY               = 0;
3321   B->same_nonzero       = PETSC_FALSE;
3322 
3323   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3324                                            "MatGetFactorAvailable_seqbaij_petsc",
3325                                            MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3326   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3327                                            "MatGetFactor_seqbaij_petsc",
3328                                            MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3330                                            "MatGetFactor_seqbaij_bstrm",
3331                                            MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3332 #if defined(PETSC_HAVE_MUMPS)
3333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3334 #endif
3335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3336                                            "MatInvertBlockDiagonal_SeqBAIJ",
3337                                            MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3339                                            "MatStoreValues_SeqBAIJ",
3340                                            MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3342                                            "MatRetrieveValues_SeqBAIJ",
3343                                            MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3345                                            "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3346                                            MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3348                                            "MatConvert_SeqBAIJ_SeqAIJ",
3349                                            MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3351                                            "MatConvert_SeqBAIJ_SeqSBAIJ",
3352                                            MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3354                                            "MatSeqBAIJSetPreallocation_SeqBAIJ",
3355                                            MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3357                                            "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3358                                            MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3360                                            "MatConvert_SeqBAIJ_SeqBSTRM",
3361                                            MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3363                                            "MatIsTranspose_SeqBAIJ",
3364                                            MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3366   PetscFunctionReturn(0);
3367 }
3368 EXTERN_C_END
3369 
3370 #undef __FUNCT__
3371 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3372 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3373 {
3374   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3375   PetscErrorCode ierr;
3376   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3377 
3378   PetscFunctionBegin;
3379   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3380 
3381   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3382     c->imax           = a->imax;
3383     c->ilen           = a->ilen;
3384     c->free_imax_ilen = PETSC_FALSE;
3385   } else {
3386     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3387     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3388     for (i=0; i<mbs; i++) {
3389       c->imax[i] = a->imax[i];
3390       c->ilen[i] = a->ilen[i];
3391     }
3392     c->free_imax_ilen = PETSC_TRUE;
3393   }
3394 
3395   /* allocate the matrix space */
3396   if (mallocmatspace) {
3397     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3398       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3399       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3400       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3401 
3402       c->i            = a->i;
3403       c->j            = a->j;
3404       c->singlemalloc = PETSC_FALSE;
3405       c->free_a       = PETSC_TRUE;
3406       c->free_ij      = PETSC_FALSE;
3407       c->parent       = A;
3408       C->preallocated = PETSC_TRUE;
3409       C->assembled    = PETSC_TRUE;
3410 
3411       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3412       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3413       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3414     } else {
3415       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3416       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3417 
3418       c->singlemalloc = PETSC_TRUE;
3419       c->free_a       = PETSC_TRUE;
3420       c->free_ij      = PETSC_TRUE;
3421 
3422       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3423       if (mbs > 0) {
3424         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3425         if (cpvalues == MAT_COPY_VALUES) {
3426           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3427         } else {
3428           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3429         }
3430       }
3431       C->preallocated = PETSC_TRUE;
3432       C->assembled    = PETSC_TRUE;
3433     }
3434   }
3435 
3436   c->roworiented = a->roworiented;
3437   c->nonew       = a->nonew;
3438 
3439   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3440   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3441 
3442   c->bs2         = a->bs2;
3443   c->mbs         = a->mbs;
3444   c->nbs         = a->nbs;
3445 
3446   if (a->diag) {
3447     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3448       c->diag      = a->diag;
3449       c->free_diag = PETSC_FALSE;
3450     } else {
3451       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3452       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3453       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3454       c->free_diag = PETSC_TRUE;
3455     }
3456   } else c->diag = 0;
3457 
3458   c->nz         = a->nz;
3459   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3460   c->solve_work = 0;
3461   c->mult_work  = 0;
3462 
3463   c->compressedrow.use   = a->compressedrow.use;
3464   c->compressedrow.nrows = a->compressedrow.nrows;
3465   c->compressedrow.check = a->compressedrow.check;
3466   if (a->compressedrow.use) {
3467     i    = a->compressedrow.nrows;
3468     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3469     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3470     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3471     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3472   } else {
3473     c->compressedrow.use    = PETSC_FALSE;
3474     c->compressedrow.i      = NULL;
3475     c->compressedrow.rindex = NULL;
3476   }
3477   C->same_nonzero = A->same_nonzero;
3478 
3479   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3480   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 #undef __FUNCT__
3485 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3486 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3487 {
3488   PetscErrorCode ierr;
3489 
3490   PetscFunctionBegin;
3491   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3492   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3493   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3494   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatLoad_SeqBAIJ"
3500 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3501 {
3502   Mat_SeqBAIJ    *a;
3503   PetscErrorCode ierr;
3504   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3505   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3506   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3507   PetscInt       *masked,nmask,tmp,bs2,ishift;
3508   PetscMPIInt    size;
3509   int            fd;
3510   PetscScalar    *aa;
3511   MPI_Comm       comm;
3512 
3513   PetscFunctionBegin;
3514   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3515   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3516   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3517   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3518   bs2  = bs*bs;
3519 
3520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3521   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3522   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3523   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3524   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3525   M = header[1]; N = header[2]; nz = header[3];
3526 
3527   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3528   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3529 
3530   /*
3531      This code adds extra rows to make sure the number of rows is
3532     divisible by the blocksize
3533   */
3534   mbs        = M/bs;
3535   extra_rows = bs - M + bs*(mbs);
3536   if (extra_rows == bs) extra_rows = 0;
3537   else mbs++;
3538   if (extra_rows) {
3539     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3540   }
3541 
3542   /* Set global sizes if not already set */
3543   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3544     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3545   } else { /* Check if the matrix global sizes are correct */
3546     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3547     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3548       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3549     }
3550     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3551   }
3552 
3553   /* read in row lengths */
3554   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3555   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3556   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3557 
3558   /* read in column indices */
3559   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3560   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3561   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3562 
3563   /* loop over row lengths determining block row lengths */
3564   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3565   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3566   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3567   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3568   rowcount = 0;
3569   nzcount  = 0;
3570   for (i=0; i<mbs; i++) {
3571     nmask = 0;
3572     for (j=0; j<bs; j++) {
3573       kmax = rowlengths[rowcount];
3574       for (k=0; k<kmax; k++) {
3575         tmp = jj[nzcount++]/bs;
3576         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3577       }
3578       rowcount++;
3579     }
3580     browlengths[i] += nmask;
3581     /* zero out the mask elements we set */
3582     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3583   }
3584 
3585   /* Do preallocation  */
3586   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3587   a    = (Mat_SeqBAIJ*)newmat->data;
3588 
3589   /* set matrix "i" values */
3590   a->i[0] = 0;
3591   for (i=1; i<= mbs; i++) {
3592     a->i[i]      = a->i[i-1] + browlengths[i-1];
3593     a->ilen[i-1] = browlengths[i-1];
3594   }
3595   a->nz = 0;
3596   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3597 
3598   /* read in nonzero values */
3599   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3600   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3601   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3602 
3603   /* set "a" and "j" values into matrix */
3604   nzcount = 0; jcount = 0;
3605   for (i=0; i<mbs; i++) {
3606     nzcountb = nzcount;
3607     nmask    = 0;
3608     for (j=0; j<bs; j++) {
3609       kmax = rowlengths[i*bs+j];
3610       for (k=0; k<kmax; k++) {
3611         tmp = jj[nzcount++]/bs;
3612         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3613       }
3614     }
3615     /* sort the masked values */
3616     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3617 
3618     /* set "j" values into matrix */
3619     maskcount = 1;
3620     for (j=0; j<nmask; j++) {
3621       a->j[jcount++]  = masked[j];
3622       mask[masked[j]] = maskcount++;
3623     }
3624     /* set "a" values into matrix */
3625     ishift = bs2*a->i[i];
3626     for (j=0; j<bs; j++) {
3627       kmax = rowlengths[i*bs+j];
3628       for (k=0; k<kmax; k++) {
3629         tmp       = jj[nzcountb]/bs;
3630         block     = mask[tmp] - 1;
3631         point     = jj[nzcountb] - bs*tmp;
3632         idx       = ishift + bs2*block + j + bs*point;
3633         a->a[idx] = (MatScalar)aa[nzcountb++];
3634       }
3635     }
3636     /* zero out the mask elements we set */
3637     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3638   }
3639   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3640 
3641   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3642   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3643   ierr = PetscFree(aa);CHKERRQ(ierr);
3644   ierr = PetscFree(jj);CHKERRQ(ierr);
3645   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3646 
3647   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 #undef __FUNCT__
3653 #define __FUNCT__ "MatCreateSeqBAIJ"
3654 /*@C
3655    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3656    compressed row) format.  For good matrix assembly performance the
3657    user should preallocate the matrix storage by setting the parameter nz
3658    (or the array nnz).  By setting these parameters accurately, performance
3659    during matrix assembly can be increased by more than a factor of 50.
3660 
3661    Collective on MPI_Comm
3662 
3663    Input Parameters:
3664 +  comm - MPI communicator, set to PETSC_COMM_SELF
3665 .  bs - size of block
3666 .  m - number of rows
3667 .  n - number of columns
3668 .  nz - number of nonzero blocks  per block row (same for all rows)
3669 -  nnz - array containing the number of nonzero blocks in the various block rows
3670          (possibly different for each block row) or NULL
3671 
3672    Output Parameter:
3673 .  A - the matrix
3674 
3675    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3676    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3677    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3678 
3679    Options Database Keys:
3680 .   -mat_no_unroll - uses code that does not unroll the loops in the
3681                      block calculations (much slower)
3682 .    -mat_block_size - size of the blocks to use
3683 
3684    Level: intermediate
3685 
3686    Notes:
3687    The number of rows and columns must be divisible by blocksize.
3688 
3689    If the nnz parameter is given then the nz parameter is ignored
3690 
3691    A nonzero block is any block that as 1 or more nonzeros in it
3692 
3693    The block AIJ format is fully compatible with standard Fortran 77
3694    storage.  That is, the stored row and column indices can begin at
3695    either one (as in Fortran) or zero.  See the users' manual for details.
3696 
3697    Specify the preallocated storage with either nz or nnz (not both).
3698    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3699    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3700    matrices.
3701 
3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3703 @*/
3704 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3705 {
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 #undef __FUNCT__
3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3718 /*@C
3719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3720    per row in the matrix. For good matrix assembly performance the
3721    user should preallocate the matrix storage by setting the parameter nz
3722    (or the array nnz).  By setting these parameters accurately, performance
3723    during matrix assembly can be increased by more than a factor of 50.
3724 
3725    Collective on MPI_Comm
3726 
3727    Input Parameters:
3728 +  A - the matrix
3729 .  bs - size of block
3730 .  nz - number of block nonzeros per block row (same for all rows)
3731 -  nnz - array containing the number of block nonzeros in the various block rows
3732          (possibly different for each block row) or NULL
3733 
3734    Options Database Keys:
3735 .   -mat_no_unroll - uses code that does not unroll the loops in the
3736                      block calculations (much slower)
3737 .    -mat_block_size - size of the blocks to use
3738 
3739    Level: intermediate
3740 
3741    Notes:
3742    If the nnz parameter is given then the nz parameter is ignored
3743 
3744    You can call MatGetInfo() to get information on how effective the preallocation was;
3745    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3746    You can also run with the option -info and look for messages with the string
3747    malloc in them to see if additional memory allocation was needed.
3748 
3749    The block AIJ format is fully compatible with standard Fortran 77
3750    storage.  That is, the stored row and column indices can begin at
3751    either one (as in Fortran) or zero.  See the users' manual for details.
3752 
3753    Specify the preallocated storage with either nz or nnz (not both).
3754    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3755    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3756 
3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3758 @*/
3759 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3760 {
3761   PetscErrorCode ierr;
3762 
3763   PetscFunctionBegin;
3764   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3765   PetscValidType(B,1);
3766   PetscValidLogicalCollectiveInt(B,bs,2);
3767   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3773 /*@C
3774    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3775    (the default sequential PETSc format).
3776 
3777    Collective on MPI_Comm
3778 
3779    Input Parameters:
3780 +  A - the matrix
3781 .  i - the indices into j for the start of each local row (starts with zero)
3782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3783 -  v - optional values in the matrix
3784 
3785    Level: developer
3786 
3787 .keywords: matrix, aij, compressed row, sparse
3788 
3789 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3790 @*/
3791 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3792 {
3793   PetscErrorCode ierr;
3794 
3795   PetscFunctionBegin;
3796   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3797   PetscValidType(B,1);
3798   PetscValidLogicalCollectiveInt(B,bs,2);
3799   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3806 /*@
3807      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3808 
3809      Collective on MPI_Comm
3810 
3811    Input Parameters:
3812 +  comm - must be an MPI communicator of size 1
3813 .  bs - size of block
3814 .  m - number of rows
3815 .  n - number of columns
3816 .  i - row indices
3817 .  j - column indices
3818 -  a - matrix values
3819 
3820    Output Parameter:
3821 .  mat - the matrix
3822 
3823    Level: advanced
3824 
3825    Notes:
3826        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3827     once the matrix is destroyed
3828 
3829        You cannot set new nonzero locations into this matrix, that will generate an error.
3830 
3831        The i and j indices are 0 based
3832 
3833        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3834 
3835 
3836 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3837 
3838 @*/
3839 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3840 {
3841   PetscErrorCode ierr;
3842   PetscInt       ii;
3843   Mat_SeqBAIJ    *baij;
3844 
3845   PetscFunctionBegin;
3846   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3847   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3848 
3849   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3850   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3851   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3852   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3853   baij = (Mat_SeqBAIJ*)(*mat)->data;
3854   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3855   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3856 
3857   baij->i = i;
3858   baij->j = j;
3859   baij->a = a;
3860 
3861   baij->singlemalloc = PETSC_FALSE;
3862   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3863   baij->free_a       = PETSC_FALSE;
3864   baij->free_ij      = PETSC_FALSE;
3865 
3866   for (ii=0; ii<m; ii++) {
3867     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3868 #if defined(PETSC_USE_DEBUG)
3869     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3870 #endif
3871   }
3872 #if defined(PETSC_USE_DEBUG)
3873   for (ii=0; ii<baij->i[m]; ii++) {
3874     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3875     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3876   }
3877 #endif
3878 
3879   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883