xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8563dfcc547dae59d5e1dc6b8e602db41cb72b14)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0]   = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v  = aa + ai[i];
162         vi = aj + ai[i];
163         nz = diag[i] - ai[i];
164         s1 = b[i2];
165         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
166         x[i2]  = idiag[0]*s1;
167         idiag += 1;
168         i2    += 1;
169       }
170       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
171       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
172     }
173     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
174         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
175       i2    = 0;
176       mdiag = a->idiag+a->mbs;
177       for (i=0; i<m; i++) {
178         x1     = x[i2];
179         x[i2]  = mdiag[0]*x1;
180         mdiag += 1;
181         i2    += 1;
182       }
183       ierr = PetscLogFlops(m);CHKERRQ(ierr);
184     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
185       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
186     }
187     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
188       idiag  = a->idiag+a->mbs - 1;
189       i2     = m - 1;
190       x1     = x[i2];
191       x[i2]  = idiag[0]*x1;
192       idiag -= 1;
193       i2    -= 1;
194       for (i=m-2; i>=0; i--) {
195         v  = aa + (diag[i]+1);
196         vi = aj + diag[i] + 1;
197         nz = ai[i+1] - diag[i] - 1;
198         s1 = x[i2];
199         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
200         x[i2]  = idiag[0]*s1;
201         idiag -= 1;
202         i2    -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar       *x,x1,x2,s1,s2;
218   const PetscScalar *b;
219   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
220   PetscErrorCode    ierr;
221   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
222   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
242       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247         v  = aa + 4*ai[i];
248         vi = aj + ai[i];
249         nz = diag[i] - ai[i];
250         s1 = b[i2]; s2 = b[i2+1];
251         for (j=0; j<nz; j++) {
252           idx = 2*vi[j];
253           it  = 4*j;
254           x1  = x[idx]; x2 = x[1+idx];
255           s1 -= v[it]*x1 + v[it+2]*x2;
256           s2 -= v[it+1]*x1 + v[it+3]*x2;
257         }
258         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag  += 4;
261         i2     += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag  -= 4;
288       i2     -= 2;
289       for (i=m-2; i>=0; i--) {
290         v  = aa + 4*(diag[i]+1);
291         vi = aj + diag[i] + 1;
292         nz = ai[i+1] - diag[i] - 1;
293         s1 = x[i2]; s2 = x[i2+1];
294         for (j=0; j<nz; j++) {
295           idx = 2*vi[j];
296           it  = 4*j;
297           x1  = x[idx]; x2 = x[1+idx];
298           s1 -= v[it]*x1 + v[it+2]*x2;
299           s2 -= v[it+1]*x1 + v[it+3]*x2;
300         }
301         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag  -= 4;
304         i2     -= 2;
305       }
306       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
307     }
308   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
317 {
318   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
319   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
320   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
321   const PetscScalar *b;
322   PetscErrorCode    ierr;
323   PetscInt          m = a->mbs,i,i2,nz,idx;
324   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
344       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v  = aa + 9*ai[i];
351         vi = aj + ai[i];
352         nz = diag[i] - ai[i];
353         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx = 3*(*vi++);
356           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v  += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag  += 9;
366         i2     += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag  -= 9;
395       i2     -= 3;
396       for (i=m-2; i>=0; i--) {
397         v  = aa + 9*(diag[i]+1);
398         vi = aj + diag[i] + 1;
399         nz = ai[i+1] - diag[i] - 1;
400         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx = 3*(*vi++);
403           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v  += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag  -= 9;
413         i2     -= 3;
414       }
415       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
416     }
417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
429   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
430   const PetscScalar *b;
431   PetscErrorCode    ierr;
432   PetscInt          m = a->mbs,i,i2,nz,idx;
433   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
434 
435   PetscFunctionBegin;
436   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
437   its = its*lits;
438   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
439   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
440   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
441   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
442   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
443 
444   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
445 
446   diag  = a->diag;
447   idiag = a->idiag;
448   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
450 
451   if (flag & SOR_ZERO_INITIAL_GUESS) {
452     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
453       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
454       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
455       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
456       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
457       i2     = 4;
458       idiag += 16;
459       for (i=1; i<m; i++) {
460         v  = aa + 16*ai[i];
461         vi = aj + ai[i];
462         nz = diag[i] - ai[i];
463         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
464         while (nz--) {
465           idx = 4*(*vi++);
466           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
467           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
468           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
469           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
470           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
471           v  += 16;
472         }
473         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
474         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
475         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
476         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
477         idiag  += 16;
478         i2     += 4;
479       }
480       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
481       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
482     }
483     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
484         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
485       i2    = 0;
486       mdiag = a->idiag+16*a->mbs;
487       for (i=0; i<m; i++) {
488         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
489         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
490         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
491         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
492         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
493         mdiag  += 16;
494         i2     += 4;
495       }
496       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
497     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
498       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
499     }
500     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
501       idiag   = a->idiag+16*a->mbs - 16;
502       i2      = 4*m - 4;
503       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
505       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
506       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
507       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
508       idiag  -= 16;
509       i2     -= 4;
510       for (i=m-2; i>=0; i--) {
511         v  = aa + 16*(diag[i]+1);
512         vi = aj + diag[i] + 1;
513         nz = ai[i+1] - diag[i] - 1;
514         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
515         while (nz--) {
516           idx = 4*(*vi++);
517           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
518           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
519           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
520           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
521           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
522           v  += 16;
523         }
524         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
525         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
526         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
527         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
528         idiag  -= 16;
529         i2     -= 4;
530       }
531       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
532     }
533   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
545   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
546   const PetscScalar *b;
547   PetscErrorCode    ierr;
548   PetscInt          m = a->mbs,i,i2,nz,idx;
549   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
550 
551   PetscFunctionBegin;
552   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
553   its = its*lits;
554   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
555   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
556   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
557   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
558   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
559 
560   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
561 
562   diag  = a->diag;
563   idiag = a->idiag;
564   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
570       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
571       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
572       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
573       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
574       i2     = 5;
575       idiag += 25;
576       for (i=1; i<m; i++) {
577         v  = aa + 25*ai[i];
578         vi = aj + ai[i];
579         nz = diag[i] - ai[i];
580         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
581         while (nz--) {
582           idx = 5*(*vi++);
583           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
584           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
585           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
586           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
587           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
588           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
589           v  += 25;
590         }
591         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
592         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
593         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
594         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
595         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
596         idiag  += 25;
597         i2     += 5;
598       }
599       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
600       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
601     }
602     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
603         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
604       i2    = 0;
605       mdiag = a->idiag+25*a->mbs;
606       for (i=0; i<m; i++) {
607         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
608         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
609         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
610         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
611         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
612         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
613         mdiag  += 25;
614         i2     += 5;
615       }
616       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
617     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
618       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
619     }
620     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       idiag   = a->idiag+25*a->mbs - 25;
622       i2      = 5*m - 5;
623       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
624       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
625       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
626       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
627       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
628       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
629       idiag  -= 25;
630       i2     -= 5;
631       for (i=m-2; i>=0; i--) {
632         v  = aa + 25*(diag[i]+1);
633         vi = aj + diag[i] + 1;
634         nz = ai[i+1] - diag[i] - 1;
635         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
636         while (nz--) {
637           idx = 5*(*vi++);
638           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
639           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
640           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
641           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
642           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
643           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
644           v  += 25;
645         }
646         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
647         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
648         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
649         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
650         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
651         idiag  -= 25;
652         i2     -= 5;
653       }
654       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
655     }
656   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
665 {
666   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
667   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
668   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
669   const PetscScalar *b;
670   PetscErrorCode    ierr;
671   PetscInt          m = a->mbs,i,i2,nz,idx;
672   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
673 
674   PetscFunctionBegin;
675   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
676   its = its*lits;
677   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
678   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
679   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
680   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
681   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
682 
683   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
684 
685   diag  = a->diag;
686   idiag = a->idiag;
687   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
689 
690   if (flag & SOR_ZERO_INITIAL_GUESS) {
691     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
692       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
693       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
694       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
695       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
696       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
697       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
698       i2     = 6;
699       idiag += 36;
700       for (i=1; i<m; i++) {
701         v  = aa + 36*ai[i];
702         vi = aj + ai[i];
703         nz = diag[i] - ai[i];
704         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
705         while (nz--) {
706           idx = 6*(*vi++);
707           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
708           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
709           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
710           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
711           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
712           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
713           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
714           v  += 36;
715         }
716         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
717         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
718         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
719         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
720         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
721         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
722         idiag  += 36;
723         i2     += 6;
724       }
725       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
726       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
727     }
728     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
729         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
730       i2    = 0;
731       mdiag = a->idiag+36*a->mbs;
732       for (i=0; i<m; i++) {
733         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
734         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
735         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
736         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
737         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
738         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
739         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
740         mdiag  += 36;
741         i2     += 6;
742       }
743       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
744     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
745       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
746     }
747     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       idiag   = a->idiag+36*a->mbs - 36;
749       i2      = 6*m - 6;
750       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
751       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
752       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
753       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
754       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
755       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
756       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
757       idiag  -= 36;
758       i2     -= 6;
759       for (i=m-2; i>=0; i--) {
760         v  = aa + 36*(diag[i]+1);
761         vi = aj + diag[i] + 1;
762         nz = ai[i+1] - diag[i] - 1;
763         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
764         while (nz--) {
765           idx = 6*(*vi++);
766           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
767           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
768           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
769           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
770           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
771           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
772           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
773           v  += 36;
774         }
775         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
776         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
777         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
778         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
779         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
780         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
781         idiag  -= 36;
782         i2     -= 6;
783       }
784       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
785     }
786   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
795 {
796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
797   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
798   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
799   const PetscScalar *b;
800   PetscErrorCode    ierr;
801   PetscInt          m = a->mbs,i,i2,nz,idx;
802   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
803 
804   PetscFunctionBegin;
805   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
806   its = its*lits;
807   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
808   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
809   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
810   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
811   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
812 
813   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
814 
815   diag  = a->diag;
816   idiag = a->idiag;
817   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
819 
820   if (flag & SOR_ZERO_INITIAL_GUESS) {
821     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
822       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
823       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
824       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
825       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
826       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
827       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
828       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
829       i2     = 7;
830       idiag += 49;
831       for (i=1; i<m; i++) {
832         v  = aa + 49*ai[i];
833         vi = aj + ai[i];
834         nz = diag[i] - ai[i];
835         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
836         while (nz--) {
837           idx = 7*(*vi++);
838           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
839           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
840           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
841           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
842           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
843           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
844           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
845           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
846           v  += 49;
847         }
848         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
849         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
850         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
851         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
852         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
853         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
854         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
855         idiag  += 49;
856         i2     += 7;
857       }
858       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
859       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
860     }
861     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
862         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
863       i2    = 0;
864       mdiag = a->idiag+49*a->mbs;
865       for (i=0; i<m; i++) {
866         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
867         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
868         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
869         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
870         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
871         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
872         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
873         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
874         mdiag  += 49;
875         i2     += 7;
876       }
877       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
878     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
880     }
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag   = a->idiag+49*a->mbs - 49;
883       i2      = 7*m - 7;
884       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
885       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
886       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
887       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
888       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
889       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
890       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
891       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
892       idiag  -= 49;
893       i2     -= 7;
894       for (i=m-2; i>=0; i--) {
895         v  = aa + 49*(diag[i]+1);
896         vi = aj + diag[i] + 1;
897         nz = ai[i+1] - diag[i] - 1;
898         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
899         while (nz--) {
900           idx = 7*(*vi++);
901           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
902           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
903           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
904           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
905           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
906           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
907           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
908           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
909           v  += 49;
910         }
911         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
912         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
913         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
914         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
915         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
916         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
917         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
918         idiag  -= 49;
919         i2     -= 7;
920       }
921       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
922     }
923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
932 {
933   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
934   PetscScalar       *x,*work,*w,*workt;
935   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
936   const PetscScalar *b;
937   PetscErrorCode    ierr;
938   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
939   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
940 
941   PetscFunctionBegin;
942   its = its*lits;
943   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
944   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
945   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
946   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
947   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
948   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
949 
950   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
951 
952   diag  = a->diag;
953   idiag = a->idiag;
954   if (!a->mult_work) {
955     k    = PetscMax(A->rmap->n,A->cmap->n);
956     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
957   }
958   work = a->mult_work;
959   if (!a->sor_work) {
960     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
961   }
962   w = a->sor_work;
963 
964   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
966 
967   if (flag & SOR_ZERO_INITIAL_GUESS) {
968     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
969       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
970       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
971       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
972       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
973       i2     = bs;
974       idiag += bs2;
975       for (i=1; i<m; i++) {
976         v  = aa + bs2*ai[i];
977         vi = aj + ai[i];
978         nz = diag[i] - ai[i];
979 
980         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
981         /* copy all rows of x that are needed into contiguous space */
982         workt = work;
983         for (j=0; j<nz; j++) {
984           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
985           workt += bs;
986         }
987         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
988         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
989          while (nz--) {
990            idx  = N*(*vi++);
991            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
992            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
993            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
994            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
995            v   += N2;
996            } */
997 
998         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
999         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1000         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1001         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1002 
1003         idiag += bs2;
1004         i2    += bs;
1005       }
1006       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1007       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1008     }
1009     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1010         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1011       i2    = 0;
1012       mdiag = a->idiag+bs2*a->mbs;
1013       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1014       for (i=0; i<m; i++) {
1015         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1016         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1017         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1018         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1019         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1020 
1021         mdiag += bs2;
1022         i2    += bs;
1023       }
1024       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1025     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1026       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1027     }
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = a->idiag+bs2*a->mbs - bs2;
1030       i2    = bs*m - bs;
1031       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1033       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1034       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1035       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1036       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1037       idiag -= bs2;
1038       i2    -= bs;
1039       for (i=m-2; i>=0; i--) {
1040         v  = aa + bs2*(diag[i]+1);
1041         vi = aj + diag[i] + 1;
1042         nz = ai[i+1] - diag[i] - 1;
1043 
1044         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1045         /* copy all rows of x that are needed into contiguous space */
1046         workt = work;
1047         for (j=0; j<nz; j++) {
1048           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049           workt += bs;
1050         }
1051         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1052         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1053         while (nz--) {
1054           idx  = N*(*vi++);
1055           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1056           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1057           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1058           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1059           v   += N2;
1060           } */
1061 
1062         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1063         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1064         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1065         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1066         idiag -= bs2;
1067         i2    -= bs;
1068       }
1069       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1070     }
1071   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1079 */
1080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1083 #define matsetvaluesblocked4_ matsetvaluesblocked4
1084 #endif
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "matsetvaluesblocked4_"
1088 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1089 {
1090   Mat               A  = *AA;
1091   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1092   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1093   PetscInt          *ai    =a->i,*ailen=a->ilen;
1094   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1095   const PetscScalar *value = v;
1096   MatScalar         *ap,*aa = a->a,*bap;
1097 
1098   PetscFunctionBegin;
1099   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1100   stepval = (n-1)*4;
1101   for (k=0; k<m; k++) { /* loop over added rows */
1102     row  = im[k];
1103     rp   = aj + ai[row];
1104     ap   = aa + 16*ai[row];
1105     nrow = ailen[row];
1106     low  = 0;
1107     high = nrow;
1108     for (l=0; l<n; l++) { /* loop over added columns */
1109       col = in[l];
1110       if (col <= lastcol)  low = 0;
1111       else                high = nrow;
1112       lastcol = col;
1113       value   = v + k*(stepval+4 + l)*4;
1114       while (high-low > 7) {
1115         t = (low+high)/2;
1116         if (rp[t] > col) high = t;
1117         else             low  = t;
1118       }
1119       for (i=low; i<high; i++) {
1120         if (rp[i] > col) break;
1121         if (rp[i] == col) {
1122           bap = ap +  16*i;
1123           for (ii=0; ii<4; ii++,value+=stepval) {
1124             for (jj=ii; jj<16; jj+=4) {
1125               bap[jj] += *value++;
1126             }
1127           }
1128           goto noinsert2;
1129         }
1130       }
1131       N = nrow++ - 1;
1132       high++; /* added new column index thus must search to one higher than before */
1133       /* shift up all the later entries in this row */
1134       for (ii=N; ii>=i; ii--) {
1135         rp[ii+1] = rp[ii];
1136         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1137       }
1138       if (N >= i) {
1139         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1140       }
1141       rp[i] = col;
1142       bap   = ap +  16*i;
1143       for (ii=0; ii<4; ii++,value+=stepval) {
1144         for (jj=ii; jj<16; jj+=4) {
1145           bap[jj] = *value++;
1146         }
1147       }
1148       noinsert2:;
1149       low = i;
1150     }
1151     ailen[row] = nrow;
1152   }
1153   PetscFunctionReturnVoid();
1154 }
1155 
1156 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1157 #define matsetvalues4_ MATSETVALUES4
1158 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1159 #define matsetvalues4_ matsetvalues4
1160 #endif
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSetValues4_"
1164 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1165 {
1166   Mat         A  = *AA;
1167   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1168   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1169   PetscInt    *ai=a->i,*ailen=a->ilen;
1170   PetscInt    *aj=a->j,brow,bcol;
1171   PetscInt    ridx,cidx,lastcol = -1;
1172   MatScalar   *ap,value,*aa=a->a,*bap;
1173 
1174   PetscFunctionBegin;
1175   for (k=0; k<m; k++) { /* loop over added rows */
1176     row  = im[k]; brow = row/4;
1177     rp   = aj + ai[brow];
1178     ap   = aa + 16*ai[brow];
1179     nrow = ailen[brow];
1180     low  = 0;
1181     high = nrow;
1182     for (l=0; l<n; l++) { /* loop over added columns */
1183       col   = in[l]; bcol = col/4;
1184       ridx  = row % 4; cidx = col % 4;
1185       value = v[l + k*n];
1186       if (col <= lastcol)  low = 0;
1187       else                high = nrow;
1188       lastcol = col;
1189       while (high-low > 7) {
1190         t = (low+high)/2;
1191         if (rp[t] > bcol) high = t;
1192         else              low  = t;
1193       }
1194       for (i=low; i<high; i++) {
1195         if (rp[i] > bcol) break;
1196         if (rp[i] == bcol) {
1197           bap   = ap +  16*i + 4*cidx + ridx;
1198           *bap += value;
1199           goto noinsert1;
1200         }
1201       }
1202       N = nrow++ - 1;
1203       high++; /* added new column thus must search to one higher than before */
1204       /* shift up all the later entries in this row */
1205       for (ii=N; ii>=i; ii--) {
1206         rp[ii+1] = rp[ii];
1207         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1208       }
1209       if (N>=i) {
1210         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1211       }
1212       rp[i]                    = bcol;
1213       ap[16*i + 4*cidx + ridx] = value;
1214 noinsert1:;
1215       low = i;
1216     }
1217     ailen[brow] = nrow;
1218   }
1219   PetscFunctionReturnVoid();
1220 }
1221 
1222 /*
1223      Checks for missing diagonals
1224 */
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1227 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1228 {
1229   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1230   PetscErrorCode ierr;
1231   PetscInt       *diag,*jj = a->j,i;
1232 
1233   PetscFunctionBegin;
1234   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1235   *missing = PETSC_FALSE;
1236   if (A->rmap->n > 0 && !jj) {
1237     *missing = PETSC_TRUE;
1238     if (d) *d = 0;
1239     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1240   } else {
1241     diag = a->diag;
1242     for (i=0; i<a->mbs; i++) {
1243       if (jj[diag[i]] != i) {
1244         *missing = PETSC_TRUE;
1245         if (d) *d = i;
1246         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1247         break;
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1256 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1257 {
1258   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1259   PetscErrorCode ierr;
1260   PetscInt       i,j,m = a->mbs;
1261 
1262   PetscFunctionBegin;
1263   if (!a->diag) {
1264     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1265     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1266     a->free_diag = PETSC_TRUE;
1267   }
1268   for (i=0; i<m; i++) {
1269     a->diag[i] = a->i[i+1];
1270     for (j=a->i[i]; j<a->i[i+1]; j++) {
1271       if (a->j[j] == i) {
1272         a->diag[i] = j;
1273         break;
1274       }
1275     }
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1283 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1284 {
1285   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1286   PetscErrorCode ierr;
1287   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1288   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1289 
1290   PetscFunctionBegin;
1291   *nn = n;
1292   if (!ia) PetscFunctionReturn(0);
1293   if (symmetric) {
1294     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1295     nz   = tia[n];
1296   } else {
1297     tia = a->i; tja = a->j;
1298   }
1299 
1300   if (!blockcompressed && bs > 1) {
1301     (*nn) *= bs;
1302     /* malloc & create the natural set of indices */
1303     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1304     if (n) {
1305       (*ia)[0] = 0;
1306       for (j=1; j<bs; j++) {
1307         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1308       }
1309     }
1310 
1311     for (i=1; i<n; i++) {
1312       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1313       for (j=1; j<bs; j++) {
1314         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1315       }
1316     }
1317     if (n) {
1318       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1319     }
1320 
1321     if (inja) {
1322       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1323       cnt = 0;
1324       for (i=0; i<n; i++) {
1325         for (j=0; j<bs; j++) {
1326           for (k=tia[i]; k<tia[i+1]; k++) {
1327             for (l=0; l<bs; l++) {
1328               (*ja)[cnt++] = bs*tja[k] + l;
1329             }
1330           }
1331         }
1332       }
1333     }
1334 
1335     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1336       ierr = PetscFree(tia);CHKERRQ(ierr);
1337       ierr = PetscFree(tja);CHKERRQ(ierr);
1338     }
1339   } else if (oshift == 1) {
1340     if (symmetric) {
1341       nz = tia[A->rmap->n/bs];
1342       /*  add 1 to i and j indices */
1343       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1344       *ia = tia;
1345       if (ja) {
1346         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1347         *ja = tja;
1348       }
1349     } else {
1350       nz = a->i[A->rmap->n/bs];
1351       /* malloc space and  add 1 to i and j indices */
1352       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1353       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1354       if (ja) {
1355         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1356         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1357       }
1358     }
1359   } else {
1360     *ia = tia;
1361     if (ja) *ja = tja;
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1368 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   if (!ia) PetscFunctionReturn(0);
1374   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1375     ierr = PetscFree(*ia);CHKERRQ(ierr);
1376     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1383 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1384 {
1385   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1386   PetscErrorCode ierr;
1387 
1388   PetscFunctionBegin;
1389 #if defined(PETSC_USE_LOG)
1390   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1391 #endif
1392   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1393   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1394   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1395   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1396   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1397   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1398   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1399   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1400   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1401   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1402   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1403   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1404   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1405 
1406   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1407   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1408   ierr = PetscFree(A->data);CHKERRQ(ierr);
1409 
1410   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1411   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
1412   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1413   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1414   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1426 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1427 {
1428   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   switch (op) {
1433   case MAT_ROW_ORIENTED:
1434     a->roworiented = flg;
1435     break;
1436   case MAT_KEEP_NONZERO_PATTERN:
1437     a->keepnonzeropattern = flg;
1438     break;
1439   case MAT_NEW_NONZERO_LOCATIONS:
1440     a->nonew = (flg ? 0 : 1);
1441     break;
1442   case MAT_NEW_NONZERO_LOCATION_ERR:
1443     a->nonew = (flg ? -1 : 0);
1444     break;
1445   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1446     a->nonew = (flg ? -2 : 0);
1447     break;
1448   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1449     a->nounused = (flg ? -1 : 0);
1450     break;
1451   case MAT_CHECK_COMPRESSED_ROW:
1452     a->compressedrow.check = flg;
1453     break;
1454   case MAT_NEW_DIAGONALS:
1455   case MAT_IGNORE_OFF_PROC_ENTRIES:
1456   case MAT_USE_HASH_TABLE:
1457     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1458     break;
1459   case MAT_SPD:
1460   case MAT_SYMMETRIC:
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462   case MAT_HERMITIAN:
1463   case MAT_SYMMETRY_ETERNAL:
1464     /* These options are handled directly by MatSetOption() */
1465     break;
1466   default:
1467     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1474 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1475 {
1476   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1479   MatScalar      *aa,*aa_i;
1480   PetscScalar    *v_i;
1481 
1482   PetscFunctionBegin;
1483   bs  = A->rmap->bs;
1484   ai  = a->i;
1485   aj  = a->j;
1486   aa  = a->a;
1487   bs2 = a->bs2;
1488 
1489   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1490 
1491   bn  = row/bs;   /* Block number */
1492   bp  = row % bs; /* Block Position */
1493   M   = ai[bn+1] - ai[bn];
1494   *nz = bs*M;
1495 
1496   if (v) {
1497     *v = 0;
1498     if (*nz) {
1499       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1500       for (i=0; i<M; i++) { /* for each block in the block row */
1501         v_i  = *v + i*bs;
1502         aa_i = aa + bs2*(ai[bn] + i);
1503         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1504       }
1505     }
1506   }
1507 
1508   if (idx) {
1509     *idx = 0;
1510     if (*nz) {
1511       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1512       for (i=0; i<M; i++) { /* for each block in the block row */
1513         idx_i = *idx + i*bs;
1514         itmp  = bs*aj[ai[bn] + i];
1515         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1516       }
1517     }
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1524 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1525 {
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1530   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1538 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1539 {
1540   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1541   Mat            C;
1542   PetscErrorCode ierr;
1543   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1544   PetscInt       *rows,*cols,bs2=a->bs2;
1545   MatScalar      *array;
1546 
1547   PetscFunctionBegin;
1548   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1549   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1550     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1551     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1552 
1553     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1554     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1555     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1556     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1557     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1558     ierr = PetscFree(col);CHKERRQ(ierr);
1559   } else {
1560     C = *B;
1561   }
1562 
1563   array = a->a;
1564   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1565   for (i=0; i<mbs; i++) {
1566     cols[0] = i*bs;
1567     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1568     len = ai[i+1] - ai[i];
1569     for (j=0; j<len; j++) {
1570       rows[0] = (*aj++)*bs;
1571       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1572       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1573       array += bs2;
1574     }
1575   }
1576   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1577 
1578   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580 
1581   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1582     *B = C;
1583   } else {
1584     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1591 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1592 {
1593   PetscErrorCode ierr;
1594   Mat            Btrans;
1595 
1596   PetscFunctionBegin;
1597   *f   = PETSC_FALSE;
1598   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1599   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1600   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1606 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1607 {
1608   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1609   PetscErrorCode ierr;
1610   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1611   int            fd;
1612   PetscScalar    *aa;
1613   FILE           *file;
1614 
1615   PetscFunctionBegin;
1616   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1617   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1618   col_lens[0] = MAT_FILE_CLASSID;
1619 
1620   col_lens[1] = A->rmap->N;
1621   col_lens[2] = A->cmap->n;
1622   col_lens[3] = a->nz*bs2;
1623 
1624   /* store lengths of each row and write (including header) to file */
1625   count = 0;
1626   for (i=0; i<a->mbs; i++) {
1627     for (j=0; j<bs; j++) {
1628       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1629     }
1630   }
1631   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1632   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1633 
1634   /* store column indices (zero start index) */
1635   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1636   count = 0;
1637   for (i=0; i<a->mbs; i++) {
1638     for (j=0; j<bs; j++) {
1639       for (k=a->i[i]; k<a->i[i+1]; k++) {
1640         for (l=0; l<bs; l++) {
1641           jj[count++] = bs*a->j[k] + l;
1642         }
1643       }
1644     }
1645   }
1646   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1647   ierr = PetscFree(jj);CHKERRQ(ierr);
1648 
1649   /* store nonzero values */
1650   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       for (k=a->i[i]; k<a->i[i+1]; k++) {
1655         for (l=0; l<bs; l++) {
1656           aa[count++] = a->a[bs2*k + l*bs + j];
1657         }
1658       }
1659     }
1660   }
1661   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1662   ierr = PetscFree(aa);CHKERRQ(ierr);
1663 
1664   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1665   if (file) {
1666     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1667   }
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1673 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1674 {
1675   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1676   PetscErrorCode    ierr;
1677   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1678   PetscViewerFormat format;
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1682   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1683     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1684   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1685     Mat aij;
1686     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1687     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1688     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1689   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1690       PetscFunctionReturn(0);
1691   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1692     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1693     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1694     for (i=0; i<a->mbs; i++) {
1695       for (j=0; j<bs; j++) {
1696         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1697         for (k=a->i[i]; k<a->i[i+1]; k++) {
1698           for (l=0; l<bs; l++) {
1699 #if defined(PETSC_USE_COMPLEX)
1700             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1701               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1702                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1703             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1704               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1705                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1706             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1707               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1708             }
1709 #else
1710             if (a->a[bs2*k + l*bs + j] != 0.0) {
1711               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1712             }
1713 #endif
1714           }
1715         }
1716         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1717       }
1718     }
1719     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1720   } else {
1721     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1722     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1723     for (i=0; i<a->mbs; i++) {
1724       for (j=0; j<bs; j++) {
1725         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1726         for (k=a->i[i]; k<a->i[i+1]; k++) {
1727           for (l=0; l<bs; l++) {
1728 #if defined(PETSC_USE_COMPLEX)
1729             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1731                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1734                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1735             } else {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             }
1738 #else
1739             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740 #endif
1741           }
1742         }
1743         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1744       }
1745     }
1746     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1747   }
1748   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #include <petscdraw.h>
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1755 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1756 {
1757   Mat               A = (Mat) Aa;
1758   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1759   PetscErrorCode    ierr;
1760   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1761   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1762   MatScalar         *aa;
1763   PetscViewer       viewer;
1764   PetscViewerFormat format;
1765 
1766   PetscFunctionBegin;
1767   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1768   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1769 
1770   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1771 
1772   /* loop over matrix elements drawing boxes */
1773 
1774   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1775     color = PETSC_DRAW_BLUE;
1776     for (i=0,row=0; i<mbs; i++,row+=bs) {
1777       for (j=a->i[i]; j<a->i[i+1]; j++) {
1778         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1779         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1780         aa  = a->a + j*bs2;
1781         for (k=0; k<bs; k++) {
1782           for (l=0; l<bs; l++) {
1783             if (PetscRealPart(*aa++) >=  0.) continue;
1784             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1785           }
1786         }
1787       }
1788     }
1789     color = PETSC_DRAW_CYAN;
1790     for (i=0,row=0; i<mbs; i++,row+=bs) {
1791       for (j=a->i[i]; j<a->i[i+1]; j++) {
1792         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1793         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1794         aa  = a->a + j*bs2;
1795         for (k=0; k<bs; k++) {
1796           for (l=0; l<bs; l++) {
1797             if (PetscRealPart(*aa++) != 0.) continue;
1798             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1799           }
1800         }
1801       }
1802     }
1803     color = PETSC_DRAW_RED;
1804     for (i=0,row=0; i<mbs; i++,row+=bs) {
1805       for (j=a->i[i]; j<a->i[i+1]; j++) {
1806         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1807         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1808         aa  = a->a + j*bs2;
1809         for (k=0; k<bs; k++) {
1810           for (l=0; l<bs; l++) {
1811             if (PetscRealPart(*aa++) <= 0.) continue;
1812             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1813           }
1814         }
1815       }
1816     }
1817   } else {
1818     /* use contour shading to indicate magnitude of values */
1819     /* first determine max of all nonzero values */
1820     PetscDraw popup;
1821     PetscReal scale,maxv = 0.0;
1822 
1823     for (i=0; i<a->nz*a->bs2; i++) {
1824       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1825     }
1826     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1827     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1828     if (popup) {
1829       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1830     }
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa  = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1839             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1850 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1851 {
1852   PetscErrorCode ierr;
1853   PetscReal      xl,yl,xr,yr,w,h;
1854   PetscDraw      draw;
1855   PetscBool      isnull;
1856 
1857   PetscFunctionBegin;
1858   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1859   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1860 
1861   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1862   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1863   xr  += w;    yr += h;  xl = -w;     yl = -h;
1864   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1865   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1866   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatView_SeqBAIJ"
1872 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1873 {
1874   PetscErrorCode ierr;
1875   PetscBool      iascii,isbinary,isdraw;
1876 
1877   PetscFunctionBegin;
1878   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1879   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1880   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1881   if (iascii) {
1882     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1883   } else if (isbinary) {
1884     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1885   } else if (isdraw) {
1886     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1887   } else {
1888     Mat B;
1889     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1890     ierr = MatView(B,viewer);CHKERRQ(ierr);
1891     ierr = MatDestroy(&B);CHKERRQ(ierr);
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1899 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1900 {
1901   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1902   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1903   PetscInt    *ai = a->i,*ailen = a->ilen;
1904   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1905   MatScalar   *ap,*aa = a->a;
1906 
1907   PetscFunctionBegin;
1908   for (k=0; k<m; k++) { /* loop over rows */
1909     row = im[k]; brow = row/bs;
1910     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1911     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1912     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1913     nrow = ailen[brow];
1914     for (l=0; l<n; l++) { /* loop over columns */
1915       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1916       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1917       col  = in[l];
1918       bcol = col/bs;
1919       cidx = col%bs;
1920       ridx = row%bs;
1921       high = nrow;
1922       low  = 0; /* assume unsorted */
1923       while (high-low > 5) {
1924         t = (low+high)/2;
1925         if (rp[t] > bcol) high = t;
1926         else             low  = t;
1927       }
1928       for (i=low; i<high; i++) {
1929         if (rp[i] > bcol) break;
1930         if (rp[i] == bcol) {
1931           *v++ = ap[bs2*i+bs*cidx+ridx];
1932           goto finished;
1933         }
1934       }
1935       *v++ = 0.0;
1936 finished:;
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1944 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1945 {
1946   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1947   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1948   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1949   PetscErrorCode    ierr;
1950   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1951   PetscBool         roworiented=a->roworiented;
1952   const PetscScalar *value     = v;
1953   MatScalar         *ap,*aa = a->a,*bap;
1954 
1955   PetscFunctionBegin;
1956   if (roworiented) {
1957     stepval = (n-1)*bs;
1958   } else {
1959     stepval = (m-1)*bs;
1960   }
1961   for (k=0; k<m; k++) { /* loop over added rows */
1962     row = im[k];
1963     if (row < 0) continue;
1964 #if defined(PETSC_USE_DEBUG)
1965     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1966 #endif
1967     rp   = aj + ai[row];
1968     ap   = aa + bs2*ai[row];
1969     rmax = imax[row];
1970     nrow = ailen[row];
1971     low  = 0;
1972     high = nrow;
1973     for (l=0; l<n; l++) { /* loop over added columns */
1974       if (in[l] < 0) continue;
1975 #if defined(PETSC_USE_DEBUG)
1976       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1977 #endif
1978       col = in[l];
1979       if (roworiented) {
1980         value = v + (k*(stepval+bs) + l)*bs;
1981       } else {
1982         value = v + (l*(stepval+bs) + k)*bs;
1983       }
1984       if (col <= lastcol) low = 0;
1985       else high = nrow;
1986       lastcol = col;
1987       while (high-low > 7) {
1988         t = (low+high)/2;
1989         if (rp[t] > col) high = t;
1990         else             low  = t;
1991       }
1992       for (i=low; i<high; i++) {
1993         if (rp[i] > col) break;
1994         if (rp[i] == col) {
1995           bap = ap +  bs2*i;
1996           if (roworiented) {
1997             if (is == ADD_VALUES) {
1998               for (ii=0; ii<bs; ii++,value+=stepval) {
1999                 for (jj=ii; jj<bs2; jj+=bs) {
2000                   bap[jj] += *value++;
2001                 }
2002               }
2003             } else {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] = *value++;
2007                 }
2008               }
2009             }
2010           } else {
2011             if (is == ADD_VALUES) {
2012               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2013                 for (jj=0; jj<bs; jj++) {
2014                   bap[jj] += value[jj];
2015                 }
2016                 bap += bs;
2017               }
2018             } else {
2019               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2020                 for (jj=0; jj<bs; jj++) {
2021                   bap[jj]  = value[jj];
2022                 }
2023                 bap += bs;
2024               }
2025             }
2026           }
2027           goto noinsert2;
2028         }
2029       }
2030       if (nonew == 1) goto noinsert2;
2031       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2032       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2033       N = nrow++ - 1; high++;
2034       /* shift up all the later entries in this row */
2035       for (ii=N; ii>=i; ii--) {
2036         rp[ii+1] = rp[ii];
2037         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2038       }
2039       if (N >= i) {
2040         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2041       }
2042       rp[i] = col;
2043       bap   = ap +  bs2*i;
2044       if (roworiented) {
2045         for (ii=0; ii<bs; ii++,value+=stepval) {
2046           for (jj=ii; jj<bs2; jj+=bs) {
2047             bap[jj] = *value++;
2048           }
2049         }
2050       } else {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=0; jj<bs; jj++) {
2053             *bap++ = *value++;
2054           }
2055         }
2056       }
2057 noinsert2:;
2058       low = i;
2059     }
2060     ailen[row] = nrow;
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2067 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2068 {
2069   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2070   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2071   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2072   PetscErrorCode ierr;
2073   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2074   MatScalar      *aa  = a->a,*ap;
2075   PetscReal      ratio=0.6;
2076 
2077   PetscFunctionBegin;
2078   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2079 
2080   if (m) rmax = ailen[0];
2081   for (i=1; i<mbs; i++) {
2082     /* move each row back by the amount of empty slots (fshift) before it*/
2083     fshift += imax[i-1] - ailen[i-1];
2084     rmax    = PetscMax(rmax,ailen[i]);
2085     if (fshift) {
2086       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2087       N  = ailen[i];
2088       for (j=0; j<N; j++) {
2089         ip[j-fshift] = ip[j];
2090 
2091         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2092       }
2093     }
2094     ai[i] = ai[i-1] + ailen[i-1];
2095   }
2096   if (mbs) {
2097     fshift += imax[mbs-1] - ailen[mbs-1];
2098     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2099   }
2100   /* reset ilen and imax for each row */
2101   for (i=0; i<mbs; i++) {
2102     ailen[i] = imax[i] = ai[i+1] - ai[i];
2103   }
2104   a->nz = ai[mbs];
2105 
2106   /* diagonals may have moved, so kill the diagonal pointers */
2107   a->idiagvalid = PETSC_FALSE;
2108   if (fshift && a->diag) {
2109     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2110     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2111     a->diag = 0;
2112   }
2113   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2114   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2115   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2116   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2117 
2118   A->info.mallocs    += a->reallocs;
2119   a->reallocs         = 0;
2120   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2121 
2122   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2123 
2124   A->same_nonzero = PETSC_TRUE;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 /*
2129    This function returns an array of flags which indicate the locations of contiguous
2130    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2131    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2132    Assume: sizes should be long enough to hold all the values.
2133 */
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2136 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2137 {
2138   PetscInt  i,j,k,row;
2139   PetscBool flg;
2140 
2141   PetscFunctionBegin;
2142   for (i=0,j=0; i<n; j++) {
2143     row = idx[i];
2144     if (row%bs!=0) { /* Not the begining of a block */
2145       sizes[j] = 1;
2146       i++;
2147     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2148       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2149       i++;
2150     } else { /* Begining of the block, so check if the complete block exists */
2151       flg = PETSC_TRUE;
2152       for (k=1; k<bs; k++) {
2153         if (row+k != idx[i+k]) { /* break in the block */
2154           flg = PETSC_FALSE;
2155           break;
2156         }
2157       }
2158       if (flg) { /* No break in the bs */
2159         sizes[j] = bs;
2160         i       += bs;
2161       } else {
2162         sizes[j] = 1;
2163         i++;
2164       }
2165     }
2166   }
2167   *bs_max = j;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2173 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2174 {
2175   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2176   PetscErrorCode    ierr;
2177   PetscInt          i,j,k,count,*rows;
2178   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2179   PetscScalar       zero = 0.0;
2180   MatScalar         *aa;
2181   const PetscScalar *xx;
2182   PetscScalar       *bb;
2183 
2184   PetscFunctionBegin;
2185   /* fix right hand side if needed */
2186   if (x && b) {
2187     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2188     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2189     for (i=0; i<is_n; i++) {
2190       bb[is_idx[i]] = diag*xx[is_idx[i]];
2191     }
2192     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2193     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2194   }
2195 
2196   /* Make a copy of the IS and  sort it */
2197   /* allocate memory for rows,sizes */
2198   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2199 
2200   /* copy IS values to rows, and sort them */
2201   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2202   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2203 
2204   if (baij->keepnonzeropattern) {
2205     for (i=0; i<is_n; i++) sizes[i] = 1;
2206     bs_max          = is_n;
2207     A->same_nonzero = PETSC_TRUE;
2208   } else {
2209     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2210 
2211     A->same_nonzero = PETSC_FALSE;
2212   }
2213 
2214   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2215     row = rows[j];
2216     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2217     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2218     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2219     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2220       if (diag != (PetscScalar)0.0) {
2221         if (baij->ilen[row/bs] > 0) {
2222           baij->ilen[row/bs]       = 1;
2223           baij->j[baij->i[row/bs]] = row/bs;
2224 
2225           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2226         }
2227         /* Now insert all the diagonal values for this bs */
2228         for (k=0; k<bs; k++) {
2229           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2230         }
2231       } else { /* (diag == 0.0) */
2232         baij->ilen[row/bs] = 0;
2233       } /* end (diag == 0.0) */
2234     } else { /* (sizes[i] != bs) */
2235 #if defined(PETSC_USE_DEBUG)
2236       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2237 #endif
2238       for (k=0; k<count; k++) {
2239         aa[0] =  zero;
2240         aa   += bs;
2241       }
2242       if (diag != (PetscScalar)0.0) {
2243         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2244       }
2245     }
2246   }
2247 
2248   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2249   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2255 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2256 {
2257   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2258   PetscErrorCode    ierr;
2259   PetscInt          i,j,k,count;
2260   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2261   PetscScalar       zero = 0.0;
2262   MatScalar         *aa;
2263   const PetscScalar *xx;
2264   PetscScalar       *bb;
2265   PetscBool         *zeroed,vecs = PETSC_FALSE;
2266 
2267   PetscFunctionBegin;
2268   /* fix right hand side if needed */
2269   if (x && b) {
2270     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2271     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2272     vecs = PETSC_TRUE;
2273   }
2274   A->same_nonzero = PETSC_TRUE;
2275 
2276   /* zero the columns */
2277   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2278   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2279   for (i=0; i<is_n; i++) {
2280     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2281     zeroed[is_idx[i]] = PETSC_TRUE;
2282   }
2283   for (i=0; i<A->rmap->N; i++) {
2284     if (!zeroed[i]) {
2285       row = i/bs;
2286       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2287         for (k=0; k<bs; k++) {
2288           col = bs*baij->j[j] + k;
2289           if (zeroed[col]) {
2290             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2291             if (vecs) bb[i] -= aa[0]*xx[col];
2292             aa[0] = 0.0;
2293           }
2294         }
2295       }
2296     } else if (vecs) bb[i] = diag*xx[i];
2297   }
2298   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2299   if (vecs) {
2300     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2301     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2302   }
2303 
2304   /* zero the rows */
2305   for (i=0; i<is_n; i++) {
2306     row   = is_idx[i];
2307     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2308     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2309     for (k=0; k<count; k++) {
2310       aa[0] =  zero;
2311       aa   += bs;
2312     }
2313     if (diag != (PetscScalar)0.0) {
2314       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2315     }
2316   }
2317   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2323 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2327   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2328   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2329   PetscErrorCode ierr;
2330   PetscInt       ridx,cidx,bs2=a->bs2;
2331   PetscBool      roworiented=a->roworiented;
2332   MatScalar      *ap,value,*aa=a->a,*bap;
2333 
2334   PetscFunctionBegin;
2335   if (v) PetscValidScalarPointer(v,6);
2336   for (k=0; k<m; k++) { /* loop over added rows */
2337     row  = im[k];
2338     brow = row/bs;
2339     if (row < 0) continue;
2340 #if defined(PETSC_USE_DEBUG)
2341     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2342 #endif
2343     rp   = aj + ai[brow];
2344     ap   = aa + bs2*ai[brow];
2345     rmax = imax[brow];
2346     nrow = ailen[brow];
2347     low  = 0;
2348     high = nrow;
2349     for (l=0; l<n; l++) { /* loop over added columns */
2350       if (in[l] < 0) continue;
2351 #if defined(PETSC_USE_DEBUG)
2352       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2353 #endif
2354       col  = in[l]; bcol = col/bs;
2355       ridx = row % bs; cidx = col % bs;
2356       if (roworiented) {
2357         value = v[l + k*n];
2358       } else {
2359         value = v[k + l*m];
2360       }
2361       if (col <= lastcol) low = 0; else high = nrow;
2362       lastcol = col;
2363       while (high-low > 7) {
2364         t = (low+high)/2;
2365         if (rp[t] > bcol) high = t;
2366         else              low  = t;
2367       }
2368       for (i=low; i<high; i++) {
2369         if (rp[i] > bcol) break;
2370         if (rp[i] == bcol) {
2371           bap = ap +  bs2*i + bs*cidx + ridx;
2372           if (is == ADD_VALUES) *bap += value;
2373           else                  *bap  = value;
2374           goto noinsert1;
2375         }
2376       }
2377       if (nonew == 1) goto noinsert1;
2378       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2379       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2380       N = nrow++ - 1; high++;
2381       /* shift up all the later entries in this row */
2382       for (ii=N; ii>=i; ii--) {
2383         rp[ii+1] = rp[ii];
2384         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2385       }
2386       if (N>=i) {
2387         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2388       }
2389       rp[i]                      = bcol;
2390       ap[bs2*i + bs*cidx + ridx] = value;
2391       a->nz++;
2392 noinsert1:;
2393       low = i;
2394     }
2395     ailen[brow] = nrow;
2396   }
2397   A->same_nonzero = PETSC_FALSE;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2403 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2404 {
2405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2406   Mat            outA;
2407   PetscErrorCode ierr;
2408   PetscBool      row_identity,col_identity;
2409 
2410   PetscFunctionBegin;
2411   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2412   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2413   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2414   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2415 
2416   outA            = inA;
2417   inA->factortype = MAT_FACTOR_LU;
2418 
2419   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2420 
2421   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2422   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2423   a->row = row;
2424   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2425   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2426   a->col = col;
2427 
2428   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2429   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2430   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2431   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2432 
2433   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2434   if (!a->solve_work) {
2435     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2436     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2437   }
2438   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2444 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2445 {
2446   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2447   PetscInt    i,nz,mbs;
2448 
2449   PetscFunctionBegin;
2450   nz  = baij->maxnz;
2451   mbs = baij->mbs;
2452   for (i=0; i<nz; i++) {
2453     baij->j[i] = indices[i];
2454   }
2455   baij->nz = nz;
2456   for (i=0; i<mbs; i++) {
2457     baij->ilen[i] = baij->imax[i];
2458   }
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2464 /*@
2465     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2466        in the matrix.
2467 
2468   Input Parameters:
2469 +  mat - the SeqBAIJ matrix
2470 -  indices - the column indices
2471 
2472   Level: advanced
2473 
2474   Notes:
2475     This can be called if you have precomputed the nonzero structure of the
2476   matrix and want to provide it to the matrix object to improve the performance
2477   of the MatSetValues() operation.
2478 
2479     You MUST have set the correct numbers of nonzeros per row in the call to
2480   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2481 
2482     MUST be called before any calls to MatSetValues();
2483 
2484 @*/
2485 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2486 {
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2491   PetscValidPointer(indices,2);
2492   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2498 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2499 {
2500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2501   PetscErrorCode ierr;
2502   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2503   PetscReal      atmp;
2504   PetscScalar    *x,zero = 0.0;
2505   MatScalar      *aa;
2506   PetscInt       ncols,brow,krow,kcol;
2507 
2508   PetscFunctionBegin;
2509   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2510   bs  = A->rmap->bs;
2511   aa  = a->a;
2512   ai  = a->i;
2513   aj  = a->j;
2514   mbs = a->mbs;
2515 
2516   ierr = VecSet(v,zero);CHKERRQ(ierr);
2517   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2518   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2519   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2520   for (i=0; i<mbs; i++) {
2521     ncols = ai[1] - ai[0]; ai++;
2522     brow  = bs*i;
2523     for (j=0; j<ncols; j++) {
2524       for (kcol=0; kcol<bs; kcol++) {
2525         for (krow=0; krow<bs; krow++) {
2526           atmp = PetscAbsScalar(*aa);aa++;
2527           row  = brow + krow;   /* row index */
2528           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2529           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2530         }
2531       }
2532       aj++;
2533     }
2534   }
2535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatCopy_SeqBAIJ"
2541 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2542 {
2543   PetscErrorCode ierr;
2544 
2545   PetscFunctionBegin;
2546   /* If the two matrices have the same copy implementation, use fast copy. */
2547   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2548     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2549     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2550     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2551 
2552     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2553     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2554     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2555   } else {
2556     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2563 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2574 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2575 {
2576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2577 
2578   PetscFunctionBegin;
2579   *array = a->a;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2585 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2586 {
2587   PetscFunctionBegin;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2593 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2594 {
2595   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2596   PetscErrorCode ierr;
2597   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2598   PetscBLASInt   one=1;
2599 
2600   PetscFunctionBegin;
2601   if (str == SAME_NONZERO_PATTERN) {
2602     PetscScalar  alpha = a;
2603     PetscBLASInt bnz;
2604     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2605     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2606   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2607     if (y->xtoy && y->XtoY != X) {
2608       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2609       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2610     }
2611     if (!y->xtoy) { /* get xtoy */
2612       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2613       y->XtoY = X;
2614       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2615     }
2616     for (i=0; i<x->nz; i++) {
2617       j = 0;
2618       while (j < bs2) {
2619         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2620         j++;
2621       }
2622     }
2623     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2624   } else {
2625     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2632 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2633 {
2634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2635   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2636   MatScalar   *aa = a->a;
2637 
2638   PetscFunctionBegin;
2639   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2645 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2646 {
2647   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2648   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2649   MatScalar   *aa = a->a;
2650 
2651   PetscFunctionBegin;
2652   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2660 /*
2661     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2662 */
2663 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2664 {
2665   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2666   PetscErrorCode ierr;
2667   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2668   PetscInt       nz = a->i[m],row,*jj,mr,col;
2669 
2670   PetscFunctionBegin;
2671   *nn = n;
2672   if (!ia) PetscFunctionReturn(0);
2673   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2674   else {
2675     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2676     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2677     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2678     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2679     jj   = a->j;
2680     for (i=0; i<nz; i++) {
2681       collengths[jj[i]]++;
2682     }
2683     cia[0] = oshift;
2684     for (i=0; i<n; i++) {
2685       cia[i+1] = cia[i] + collengths[i];
2686     }
2687     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2688     jj   = a->j;
2689     for (row=0; row<m; row++) {
2690       mr = a->i[row+1] - a->i[row];
2691       for (i=0; i<mr; i++) {
2692         col = *jj++;
2693 
2694         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2695       }
2696     }
2697     ierr = PetscFree(collengths);CHKERRQ(ierr);
2698     *ia  = cia; *ja = cja;
2699   }
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2705 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2706 {
2707   PetscErrorCode ierr;
2708 
2709   PetscFunctionBegin;
2710   if (!ia) PetscFunctionReturn(0);
2711   ierr = PetscFree(*ia);CHKERRQ(ierr);
2712   ierr = PetscFree(*ja);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2718 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2719 {
2720   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2721   PetscErrorCode ierr;
2722   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2723   PetscScalar    dx,*y,*xx,*w3_array;
2724   PetscScalar    *vscale_array;
2725   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2726   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2727   void           *fctx   = coloring->fctx;
2728   PetscBool      flg     = PETSC_FALSE;
2729   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2730   Vec            x1_tmp;
2731 
2732   PetscFunctionBegin;
2733   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2734   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2735   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2736   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2737 
2738   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2739   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2740   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2741   if (flg) {
2742     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2743   } else {
2744     PetscBool assembled;
2745     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2746     if (assembled) {
2747       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2748     }
2749   }
2750 
2751   x1_tmp = x1;
2752   if (!coloring->vscale) {
2753     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2754   }
2755 
2756   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2757     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2758   }
2759   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2760 
2761   /* Set w1 = F(x1) */
2762   if (!coloring->fset) {
2763     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2764     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2765     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2766   } else {
2767     coloring->fset = PETSC_FALSE;
2768   }
2769 
2770   if (!coloring->w3) {
2771     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2772     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2773   }
2774   w3 = coloring->w3;
2775 
2776   CHKMEMQ;
2777   /* Compute all the local scale factors, including ghost points */
2778   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2779   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2780   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2781   if (ctype == IS_COLORING_GHOSTED) {
2782     col_start = 0; col_end = N;
2783   } else if (ctype == IS_COLORING_GLOBAL) {
2784     xx           = xx - start;
2785     vscale_array = vscale_array - start;
2786     col_start    = start; col_end = N + start;
2787   }
2788   CHKMEMQ;
2789   for (col=col_start; col<col_end; col++) {
2790     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2791     if (coloring->htype[0] == 'w') {
2792       dx = 1.0 + unorm;
2793     } else {
2794       dx = xx[col];
2795     }
2796     if (dx == (PetscScalar)0.0) dx = 1.0;
2797     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2798     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2799     dx               *= epsilon;
2800     vscale_array[col] = (PetscScalar)1.0/dx;
2801   }     CHKMEMQ;
2802   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2803   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2804   if (ctype == IS_COLORING_GLOBAL) {
2805     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2806     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2807   }
2808   CHKMEMQ;
2809   if (coloring->vscaleforrow) {
2810     vscaleforrow = coloring->vscaleforrow;
2811   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2812 
2813   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2814   /*
2815     Loop over each color
2816   */
2817   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2818   for (k=0; k<coloring->ncolors; k++) {
2819     coloring->currentcolor = k;
2820     for (i=0; i<bs; i++) {
2821       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2822       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2823       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2824       /*
2825         Loop over each column associated with color
2826         adding the perturbation to the vector w3.
2827       */
2828       for (l=0; l<coloring->ncolumns[k]; l++) {
2829         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2830         if (coloring->htype[0] == 'w') {
2831           dx = 1.0 + unorm;
2832         } else {
2833           dx = xx[col];
2834         }
2835         if (dx == (PetscScalar)0.0) dx = 1.0;
2836         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2837         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2838         dx *= epsilon;
2839         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2840         w3_array[col] += dx;
2841       }
2842       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2843       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2844 
2845       /*
2846         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2847         w2 = F(x1 + dx) - F(x1)
2848       */
2849       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2850       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2851       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2852       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2853 
2854       /*
2855         Loop over rows of vector, putting results into Jacobian matrix
2856       */
2857       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2858       for (l=0; l<coloring->nrows[k]; l++) {
2859         row = bs*coloring->rows[k][l];                /* local row index */
2860         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2861         for (j=0; j<bs; j++) {
2862           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2863           srows[j]  = row + start + j;
2864         }
2865         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2866       }
2867       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2868     }
2869   } /* endof for each color */
2870   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2871   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2872   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2873   ierr = PetscFree(srows);CHKERRQ(ierr);
2874 
2875   coloring->currentcolor = -1;
2876 
2877   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 /* -------------------------------------------------------------------*/
2884 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2885                                        MatGetRow_SeqBAIJ,
2886                                        MatRestoreRow_SeqBAIJ,
2887                                        MatMult_SeqBAIJ_N,
2888                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2889                                        MatMultTranspose_SeqBAIJ,
2890                                        MatMultTransposeAdd_SeqBAIJ,
2891                                        0,
2892                                        0,
2893                                        0,
2894                                /* 10*/ 0,
2895                                        MatLUFactor_SeqBAIJ,
2896                                        0,
2897                                        0,
2898                                        MatTranspose_SeqBAIJ,
2899                                /* 15*/ MatGetInfo_SeqBAIJ,
2900                                        MatEqual_SeqBAIJ,
2901                                        MatGetDiagonal_SeqBAIJ,
2902                                        MatDiagonalScale_SeqBAIJ,
2903                                        MatNorm_SeqBAIJ,
2904                                /* 20*/ 0,
2905                                        MatAssemblyEnd_SeqBAIJ,
2906                                        MatSetOption_SeqBAIJ,
2907                                        MatZeroEntries_SeqBAIJ,
2908                                /* 24*/ MatZeroRows_SeqBAIJ,
2909                                        0,
2910                                        0,
2911                                        0,
2912                                        0,
2913                                /* 29*/ MatSetUp_SeqBAIJ,
2914                                        0,
2915                                        0,
2916                                        0,
2917                                        0,
2918                                /* 34*/ MatDuplicate_SeqBAIJ,
2919                                        0,
2920                                        0,
2921                                        MatILUFactor_SeqBAIJ,
2922                                        0,
2923                                /* 39*/ MatAXPY_SeqBAIJ,
2924                                        MatGetSubMatrices_SeqBAIJ,
2925                                        MatIncreaseOverlap_SeqBAIJ,
2926                                        MatGetValues_SeqBAIJ,
2927                                        MatCopy_SeqBAIJ,
2928                                /* 44*/ 0,
2929                                        MatScale_SeqBAIJ,
2930                                        0,
2931                                        0,
2932                                        MatZeroRowsColumns_SeqBAIJ,
2933                                /* 49*/ 0,
2934                                        MatGetRowIJ_SeqBAIJ,
2935                                        MatRestoreRowIJ_SeqBAIJ,
2936                                        MatGetColumnIJ_SeqBAIJ,
2937                                        MatRestoreColumnIJ_SeqBAIJ,
2938                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2939                                        0,
2940                                        0,
2941                                        0,
2942                                        MatSetValuesBlocked_SeqBAIJ,
2943                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2944                                        MatDestroy_SeqBAIJ,
2945                                        MatView_SeqBAIJ,
2946                                        0,
2947                                        0,
2948                                /* 64*/ 0,
2949                                        0,
2950                                        0,
2951                                        0,
2952                                        0,
2953                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2954                                        0,
2955                                        MatConvert_Basic,
2956                                        0,
2957                                        0,
2958                                /* 74*/ 0,
2959                                        MatFDColoringApply_BAIJ,
2960                                        0,
2961                                        0,
2962                                        0,
2963                                /* 79*/ 0,
2964                                        0,
2965                                        0,
2966                                        0,
2967                                        MatLoad_SeqBAIJ,
2968                                /* 84*/ 0,
2969                                        0,
2970                                        0,
2971                                        0,
2972                                        0,
2973                                /* 89*/ 0,
2974                                        0,
2975                                        0,
2976                                        0,
2977                                        0,
2978                                /* 94*/ 0,
2979                                        0,
2980                                        0,
2981                                        0,
2982                                        0,
2983                                /* 99*/ 0,
2984                                        0,
2985                                        0,
2986                                        0,
2987                                        0,
2988                                /*104*/ 0,
2989                                        MatRealPart_SeqBAIJ,
2990                                        MatImaginaryPart_SeqBAIJ,
2991                                        0,
2992                                        0,
2993                                /*109*/ 0,
2994                                        0,
2995                                        0,
2996                                        0,
2997                                        MatMissingDiagonal_SeqBAIJ,
2998                                /*114*/ 0,
2999                                        0,
3000                                        0,
3001                                        0,
3002                                        0,
3003                                /*119*/ 0,
3004                                        0,
3005                                        MatMultHermitianTranspose_SeqBAIJ,
3006                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3007                                        0,
3008                                /*124*/ 0,
3009                                        0,
3010                                        MatInvertBlockDiagonal_SeqBAIJ,
3011                                        0,
3012                                        0,
3013                                /*129*/ 0,
3014                                        0,
3015                                        0,
3016                                        0,
3017                                        0,
3018                                /*134*/ 0,
3019                                        0,
3020                                        0,
3021                                        0,
3022                                        0,
3023                                /*139*/ 0,
3024                                        0
3025 };
3026 
3027 #undef __FUNCT__
3028 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3029 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3030 {
3031   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3032   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3033   PetscErrorCode ierr;
3034 
3035   PetscFunctionBegin;
3036   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3037 
3038   /* allocate space for values if not already there */
3039   if (!aij->saved_values) {
3040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3041     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3042   }
3043 
3044   /* copy values over */
3045   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3051 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3052 {
3053   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3054   PetscErrorCode ierr;
3055   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3056 
3057   PetscFunctionBegin;
3058   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3059   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3060 
3061   /* copy values over */
3062   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3067 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3071 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3072 {
3073   Mat_SeqBAIJ    *b;
3074   PetscErrorCode ierr;
3075   PetscInt       i,mbs,nbs,bs2;
3076   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3077 
3078   PetscFunctionBegin;
3079   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3080   if (nz == MAT_SKIP_ALLOCATION) {
3081     skipallocation = PETSC_TRUE;
3082     nz             = 0;
3083   }
3084 
3085   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3086   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3087   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3088   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3089   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3090 
3091   B->preallocated = PETSC_TRUE;
3092 
3093   mbs = B->rmap->n/bs;
3094   nbs = B->cmap->n/bs;
3095   bs2 = bs*bs;
3096 
3097   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3098 
3099   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3100   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3101   if (nnz) {
3102     for (i=0; i<mbs; i++) {
3103       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3104       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3105     }
3106   }
3107 
3108   b    = (Mat_SeqBAIJ*)B->data;
3109   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3110   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3111   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3112 
3113   if (!flg) {
3114     switch (bs) {
3115     case 1:
3116       B->ops->mult    = MatMult_SeqBAIJ_1;
3117       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3118       B->ops->sor     = MatSOR_SeqBAIJ_1;
3119       break;
3120     case 2:
3121       B->ops->mult    = MatMult_SeqBAIJ_2;
3122       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3123       B->ops->sor     = MatSOR_SeqBAIJ_2;
3124       break;
3125     case 3:
3126       B->ops->mult    = MatMult_SeqBAIJ_3;
3127       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3128       B->ops->sor     = MatSOR_SeqBAIJ_3;
3129       break;
3130     case 4:
3131       B->ops->mult    = MatMult_SeqBAIJ_4;
3132       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3133       B->ops->sor     = MatSOR_SeqBAIJ_4;
3134       break;
3135     case 5:
3136       B->ops->mult    = MatMult_SeqBAIJ_5;
3137       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3138       B->ops->sor     = MatSOR_SeqBAIJ_5;
3139       break;
3140     case 6:
3141       B->ops->mult    = MatMult_SeqBAIJ_6;
3142       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3143       B->ops->sor     = MatSOR_SeqBAIJ_6;
3144       break;
3145     case 7:
3146       B->ops->mult    = MatMult_SeqBAIJ_7;
3147       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3148       B->ops->sor     = MatSOR_SeqBAIJ_7;
3149       break;
3150     case 15:
3151       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3152       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3153       B->ops->sor     = MatSOR_SeqBAIJ_N;
3154       break;
3155     default:
3156       B->ops->mult    = MatMult_SeqBAIJ_N;
3157       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3158       B->ops->sor     = MatSOR_SeqBAIJ_N;
3159       break;
3160     }
3161   }
3162   b->mbs = mbs;
3163   b->nbs = nbs;
3164   if (!skipallocation) {
3165     if (!b->imax) {
3166       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3167       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3168 
3169       b->free_imax_ilen = PETSC_TRUE;
3170     }
3171     /* b->ilen will count nonzeros in each block row so far. */
3172     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3173     if (!nnz) {
3174       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3175       else if (nz < 0) nz = 1;
3176       for (i=0; i<mbs; i++) b->imax[i] = nz;
3177       nz = nz*mbs;
3178     } else {
3179       nz = 0;
3180       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3181     }
3182 
3183     /* allocate the matrix space */
3184     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3185     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3186     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3187     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3188     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3189 
3190     b->singlemalloc = PETSC_TRUE;
3191     b->i[0]         = 0;
3192     for (i=1; i<mbs+1; i++) {
3193       b->i[i] = b->i[i-1] + b->imax[i-1];
3194     }
3195     b->free_a  = PETSC_TRUE;
3196     b->free_ij = PETSC_TRUE;
3197   } else {
3198     b->free_a  = PETSC_FALSE;
3199     b->free_ij = PETSC_FALSE;
3200   }
3201 
3202   b->bs2              = bs2;
3203   b->mbs              = mbs;
3204   b->nz               = 0;
3205   b->maxnz            = nz;
3206   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3207   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3213 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3214 {
3215   PetscInt       i,m,nz,nz_max=0,*nnz;
3216   PetscScalar    *values=0;
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3221   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3222   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3223   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3224   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3225   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3226   m    = B->rmap->n/bs;
3227 
3228   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3229   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3230   for (i=0; i<m; i++) {
3231     nz = ii[i+1]- ii[i];
3232     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3233     nz_max = PetscMax(nz_max, nz);
3234     nnz[i] = nz;
3235   }
3236   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3237   ierr = PetscFree(nnz);CHKERRQ(ierr);
3238 
3239   values = (PetscScalar*)V;
3240   if (!values) {
3241     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3242     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3243   }
3244   for (i=0; i<m; i++) {
3245     PetscInt          ncols  = ii[i+1] - ii[i];
3246     const PetscInt    *icols = jj + ii[i];
3247     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3248     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3249   }
3250   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3251   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3252   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3254   PetscFunctionReturn(0);
3255 }
3256 
3257 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3258 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3259 #if defined(PETSC_HAVE_MUMPS)
3260 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3261 #endif
3262 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3263 
3264 /*MC
3265    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3266    block sparse compressed row format.
3267 
3268    Options Database Keys:
3269 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3270 
3271   Level: beginner
3272 
3273 .seealso: MatCreateSeqBAIJ()
3274 M*/
3275 
3276 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatCreate_SeqBAIJ"
3280 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3281 {
3282   PetscErrorCode ierr;
3283   PetscMPIInt    size;
3284   Mat_SeqBAIJ    *b;
3285 
3286   PetscFunctionBegin;
3287   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3288   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3289 
3290   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3291   B->data = (void*)b;
3292   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3293 
3294   b->row          = 0;
3295   b->col          = 0;
3296   b->icol         = 0;
3297   b->reallocs     = 0;
3298   b->saved_values = 0;
3299 
3300   b->roworiented        = PETSC_TRUE;
3301   b->nonew              = 0;
3302   b->diag               = 0;
3303   b->solve_work         = 0;
3304   b->mult_work          = 0;
3305   B->spptr              = 0;
3306   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3307   b->keepnonzeropattern = PETSC_FALSE;
3308   b->xtoy               = 0;
3309   b->XtoY               = 0;
3310   B->same_nonzero       = PETSC_FALSE;
3311 
3312   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3313   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3314   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3315 #if defined(PETSC_HAVE_MUMPS)
3316   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3317 #endif
3318   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3319   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3320   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3321   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3322   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3323   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3324   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3325   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3326   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3327   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3328   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3334 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3335 {
3336   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3337   PetscErrorCode ierr;
3338   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3339 
3340   PetscFunctionBegin;
3341   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3342 
3343   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3344     c->imax           = a->imax;
3345     c->ilen           = a->ilen;
3346     c->free_imax_ilen = PETSC_FALSE;
3347   } else {
3348     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3349     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3350     for (i=0; i<mbs; i++) {
3351       c->imax[i] = a->imax[i];
3352       c->ilen[i] = a->ilen[i];
3353     }
3354     c->free_imax_ilen = PETSC_TRUE;
3355   }
3356 
3357   /* allocate the matrix space */
3358   if (mallocmatspace) {
3359     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3360       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3361       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3362       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3363 
3364       c->i            = a->i;
3365       c->j            = a->j;
3366       c->singlemalloc = PETSC_FALSE;
3367       c->free_a       = PETSC_TRUE;
3368       c->free_ij      = PETSC_FALSE;
3369       c->parent       = A;
3370       C->preallocated = PETSC_TRUE;
3371       C->assembled    = PETSC_TRUE;
3372 
3373       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3374       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3375       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3376     } else {
3377       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3378       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3379 
3380       c->singlemalloc = PETSC_TRUE;
3381       c->free_a       = PETSC_TRUE;
3382       c->free_ij      = PETSC_TRUE;
3383 
3384       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3385       if (mbs > 0) {
3386         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3387         if (cpvalues == MAT_COPY_VALUES) {
3388           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3389         } else {
3390           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3391         }
3392       }
3393       C->preallocated = PETSC_TRUE;
3394       C->assembled    = PETSC_TRUE;
3395     }
3396   }
3397 
3398   c->roworiented = a->roworiented;
3399   c->nonew       = a->nonew;
3400 
3401   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3402   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3403 
3404   c->bs2         = a->bs2;
3405   c->mbs         = a->mbs;
3406   c->nbs         = a->nbs;
3407 
3408   if (a->diag) {
3409     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3410       c->diag      = a->diag;
3411       c->free_diag = PETSC_FALSE;
3412     } else {
3413       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3414       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3415       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3416       c->free_diag = PETSC_TRUE;
3417     }
3418   } else c->diag = 0;
3419 
3420   c->nz         = a->nz;
3421   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3422   c->solve_work = 0;
3423   c->mult_work  = 0;
3424 
3425   c->compressedrow.use   = a->compressedrow.use;
3426   c->compressedrow.nrows = a->compressedrow.nrows;
3427   c->compressedrow.check = a->compressedrow.check;
3428   if (a->compressedrow.use) {
3429     i    = a->compressedrow.nrows;
3430     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3431     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3432     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3433     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3434   } else {
3435     c->compressedrow.use    = PETSC_FALSE;
3436     c->compressedrow.i      = NULL;
3437     c->compressedrow.rindex = NULL;
3438   }
3439   C->same_nonzero = A->same_nonzero;
3440 
3441   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3442   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 #undef __FUNCT__
3447 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3448 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3449 {
3450   PetscErrorCode ierr;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3454   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3455   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3456   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 #undef __FUNCT__
3461 #define __FUNCT__ "MatLoad_SeqBAIJ"
3462 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3463 {
3464   Mat_SeqBAIJ    *a;
3465   PetscErrorCode ierr;
3466   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3467   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3468   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3469   PetscInt       *masked,nmask,tmp,bs2,ishift;
3470   PetscMPIInt    size;
3471   int            fd;
3472   PetscScalar    *aa;
3473   MPI_Comm       comm;
3474 
3475   PetscFunctionBegin;
3476   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3477   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3478   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3479   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3480   bs2  = bs*bs;
3481 
3482   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3483   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3484   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3485   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3486   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3487   M = header[1]; N = header[2]; nz = header[3];
3488 
3489   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3490   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3491 
3492   /*
3493      This code adds extra rows to make sure the number of rows is
3494     divisible by the blocksize
3495   */
3496   mbs        = M/bs;
3497   extra_rows = bs - M + bs*(mbs);
3498   if (extra_rows == bs) extra_rows = 0;
3499   else mbs++;
3500   if (extra_rows) {
3501     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3502   }
3503 
3504   /* Set global sizes if not already set */
3505   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3506     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3507   } else { /* Check if the matrix global sizes are correct */
3508     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3509     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3510       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3511     }
3512     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3513   }
3514 
3515   /* read in row lengths */
3516   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3517   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3518   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3519 
3520   /* read in column indices */
3521   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3522   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3523   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3524 
3525   /* loop over row lengths determining block row lengths */
3526   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3527   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3528   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3529   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3530   rowcount = 0;
3531   nzcount  = 0;
3532   for (i=0; i<mbs; i++) {
3533     nmask = 0;
3534     for (j=0; j<bs; j++) {
3535       kmax = rowlengths[rowcount];
3536       for (k=0; k<kmax; k++) {
3537         tmp = jj[nzcount++]/bs;
3538         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3539       }
3540       rowcount++;
3541     }
3542     browlengths[i] += nmask;
3543     /* zero out the mask elements we set */
3544     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3545   }
3546 
3547   /* Do preallocation  */
3548   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3549   a    = (Mat_SeqBAIJ*)newmat->data;
3550 
3551   /* set matrix "i" values */
3552   a->i[0] = 0;
3553   for (i=1; i<= mbs; i++) {
3554     a->i[i]      = a->i[i-1] + browlengths[i-1];
3555     a->ilen[i-1] = browlengths[i-1];
3556   }
3557   a->nz = 0;
3558   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3559 
3560   /* read in nonzero values */
3561   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3562   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3563   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3564 
3565   /* set "a" and "j" values into matrix */
3566   nzcount = 0; jcount = 0;
3567   for (i=0; i<mbs; i++) {
3568     nzcountb = nzcount;
3569     nmask    = 0;
3570     for (j=0; j<bs; j++) {
3571       kmax = rowlengths[i*bs+j];
3572       for (k=0; k<kmax; k++) {
3573         tmp = jj[nzcount++]/bs;
3574         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3575       }
3576     }
3577     /* sort the masked values */
3578     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3579 
3580     /* set "j" values into matrix */
3581     maskcount = 1;
3582     for (j=0; j<nmask; j++) {
3583       a->j[jcount++]  = masked[j];
3584       mask[masked[j]] = maskcount++;
3585     }
3586     /* set "a" values into matrix */
3587     ishift = bs2*a->i[i];
3588     for (j=0; j<bs; j++) {
3589       kmax = rowlengths[i*bs+j];
3590       for (k=0; k<kmax; k++) {
3591         tmp       = jj[nzcountb]/bs;
3592         block     = mask[tmp] - 1;
3593         point     = jj[nzcountb] - bs*tmp;
3594         idx       = ishift + bs2*block + j + bs*point;
3595         a->a[idx] = (MatScalar)aa[nzcountb++];
3596       }
3597     }
3598     /* zero out the mask elements we set */
3599     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3600   }
3601   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3602 
3603   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3604   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3605   ierr = PetscFree(aa);CHKERRQ(ierr);
3606   ierr = PetscFree(jj);CHKERRQ(ierr);
3607   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3608 
3609   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #undef __FUNCT__
3615 #define __FUNCT__ "MatCreateSeqBAIJ"
3616 /*@C
3617    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3618    compressed row) format.  For good matrix assembly performance the
3619    user should preallocate the matrix storage by setting the parameter nz
3620    (or the array nnz).  By setting these parameters accurately, performance
3621    during matrix assembly can be increased by more than a factor of 50.
3622 
3623    Collective on MPI_Comm
3624 
3625    Input Parameters:
3626 +  comm - MPI communicator, set to PETSC_COMM_SELF
3627 .  bs - size of block
3628 .  m - number of rows
3629 .  n - number of columns
3630 .  nz - number of nonzero blocks  per block row (same for all rows)
3631 -  nnz - array containing the number of nonzero blocks in the various block rows
3632          (possibly different for each block row) or NULL
3633 
3634    Output Parameter:
3635 .  A - the matrix
3636 
3637    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3638    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3639    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3640 
3641    Options Database Keys:
3642 .   -mat_no_unroll - uses code that does not unroll the loops in the
3643                      block calculations (much slower)
3644 .    -mat_block_size - size of the blocks to use
3645 
3646    Level: intermediate
3647 
3648    Notes:
3649    The number of rows and columns must be divisible by blocksize.
3650 
3651    If the nnz parameter is given then the nz parameter is ignored
3652 
3653    A nonzero block is any block that as 1 or more nonzeros in it
3654 
3655    The block AIJ format is fully compatible with standard Fortran 77
3656    storage.  That is, the stored row and column indices can begin at
3657    either one (as in Fortran) or zero.  See the users' manual for details.
3658 
3659    Specify the preallocated storage with either nz or nnz (not both).
3660    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3661    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3662    matrices.
3663 
3664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3665 @*/
3666 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3667 {
3668   PetscErrorCode ierr;
3669 
3670   PetscFunctionBegin;
3671   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3672   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3673   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3674   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 #undef __FUNCT__
3679 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3680 /*@C
3681    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3682    per row in the matrix. For good matrix assembly performance the
3683    user should preallocate the matrix storage by setting the parameter nz
3684    (or the array nnz).  By setting these parameters accurately, performance
3685    during matrix assembly can be increased by more than a factor of 50.
3686 
3687    Collective on MPI_Comm
3688 
3689    Input Parameters:
3690 +  A - the matrix
3691 .  bs - size of block
3692 .  nz - number of block nonzeros per block row (same for all rows)
3693 -  nnz - array containing the number of block nonzeros in the various block rows
3694          (possibly different for each block row) or NULL
3695 
3696    Options Database Keys:
3697 .   -mat_no_unroll - uses code that does not unroll the loops in the
3698                      block calculations (much slower)
3699 .    -mat_block_size - size of the blocks to use
3700 
3701    Level: intermediate
3702 
3703    Notes:
3704    If the nnz parameter is given then the nz parameter is ignored
3705 
3706    You can call MatGetInfo() to get information on how effective the preallocation was;
3707    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3708    You can also run with the option -info and look for messages with the string
3709    malloc in them to see if additional memory allocation was needed.
3710 
3711    The block AIJ format is fully compatible with standard Fortran 77
3712    storage.  That is, the stored row and column indices can begin at
3713    either one (as in Fortran) or zero.  See the users' manual for details.
3714 
3715    Specify the preallocated storage with either nz or nnz (not both).
3716    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3717    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3718 
3719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3720 @*/
3721 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3722 {
3723   PetscErrorCode ierr;
3724 
3725   PetscFunctionBegin;
3726   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3727   PetscValidType(B,1);
3728   PetscValidLogicalCollectiveInt(B,bs,2);
3729   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3730   PetscFunctionReturn(0);
3731 }
3732 
3733 #undef __FUNCT__
3734 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3735 /*@C
3736    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3737    (the default sequential PETSc format).
3738 
3739    Collective on MPI_Comm
3740 
3741    Input Parameters:
3742 +  A - the matrix
3743 .  i - the indices into j for the start of each local row (starts with zero)
3744 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3745 -  v - optional values in the matrix
3746 
3747    Level: developer
3748 
3749 .keywords: matrix, aij, compressed row, sparse
3750 
3751 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3752 @*/
3753 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3754 {
3755   PetscErrorCode ierr;
3756 
3757   PetscFunctionBegin;
3758   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3759   PetscValidType(B,1);
3760   PetscValidLogicalCollectiveInt(B,bs,2);
3761   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3768 /*@
3769      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3770 
3771      Collective on MPI_Comm
3772 
3773    Input Parameters:
3774 +  comm - must be an MPI communicator of size 1
3775 .  bs - size of block
3776 .  m - number of rows
3777 .  n - number of columns
3778 .  i - row indices
3779 .  j - column indices
3780 -  a - matrix values
3781 
3782    Output Parameter:
3783 .  mat - the matrix
3784 
3785    Level: advanced
3786 
3787    Notes:
3788        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3789     once the matrix is destroyed
3790 
3791        You cannot set new nonzero locations into this matrix, that will generate an error.
3792 
3793        The i and j indices are 0 based
3794 
3795        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3796 
3797 
3798 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3799 
3800 @*/
3801 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3802 {
3803   PetscErrorCode ierr;
3804   PetscInt       ii;
3805   Mat_SeqBAIJ    *baij;
3806 
3807   PetscFunctionBegin;
3808   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3809   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3810 
3811   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3812   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3813   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3814   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3815   baij = (Mat_SeqBAIJ*)(*mat)->data;
3816   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3817   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3818 
3819   baij->i = i;
3820   baij->j = j;
3821   baij->a = a;
3822 
3823   baij->singlemalloc = PETSC_FALSE;
3824   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3825   baij->free_a       = PETSC_FALSE;
3826   baij->free_ij      = PETSC_FALSE;
3827 
3828   for (ii=0; ii<m; ii++) {
3829     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3830 #if defined(PETSC_USE_DEBUG)
3831     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3832 #endif
3833   }
3834 #if defined(PETSC_USE_DEBUG)
3835   for (ii=0; ii<baij->i[m]; ii++) {
3836     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3837     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3838   }
3839 #endif
3840 
3841   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   PetscFunctionReturn(0);
3844 }
3845