xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ee8a2423ec58a476834342a06fa6cfdfc0d5f5d2)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
28   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
37 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
40   PetscErrorCode ierr;
41   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
42   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
43   PetscReal      shift = 0.0;
44 
45   PetscFunctionBegin;
46   if (a->idiagvalid) PetscFunctionReturn(0);
47   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
48   diag_offset = a->diag;
49   if (!a->idiag) {
50     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
51     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
52   }
53   diag  = a->idiag;
54   mdiag = a->idiag+bs2*mbs;
55   /* factor and invert each block */
56   switch (bs){
57     case 1:
58       for (i=0; i<mbs; i++) {
59         odiag = v + 1*diag_offset[i];
60         diag[0]  = odiag[0];
61         mdiag[0] = odiag[0];
62         diag[0]  = 1.0 / (diag[0] + shift);
63         diag    += 1;
64         mdiag   += 1;
65       }
66       break;
67     case 2:
68       for (i=0; i<mbs; i++) {
69         odiag   = v + 4*diag_offset[i];
70         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
71 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
72 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
73 	diag    += 4;
74 	mdiag   += 4;
75       }
76       break;
77     case 3:
78       for (i=0; i<mbs; i++) {
79         odiag    = v + 9*diag_offset[i];
80         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82         diag[8]  = odiag[8];
83         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85         mdiag[8] = odiag[8];
86 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
87 	diag    += 9;
88 	mdiag   += 9;
89       }
90       break;
91     case 4:
92       for (i=0; i<mbs; i++) {
93         odiag  = v + 16*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
97 	diag  += 16;
98 	mdiag += 16;
99       }
100       break;
101     case 5:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 25*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
107 	diag  += 25;
108 	mdiag += 25;
109       }
110       break;
111     case 6:
112       for (i=0; i<mbs; i++) {
113         odiag = v + 36*diag_offset[i];
114         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
117 	diag  += 36;
118 	mdiag += 36;
119       }
120       break;
121     case 7:
122       for (i=0; i<mbs; i++) {
123         odiag = v + 49*diag_offset[i];
124         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
127 	diag  += 49;
128 	mdiag += 49;
129       }
130       break;
131     default:
132       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
133       for (i=0; i<mbs; i++) {
134         odiag = v + bs2*diag_offset[i];
135         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
138 	diag  += bs2;
139 	mdiag += bs2;
140       }
141       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
151 {
152   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
153   PetscScalar        *x,x1,s1;
154   const PetscScalar  *b;
155   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
156   PetscErrorCode     ierr;
157   PetscInt           m = a->mbs,i,i2,nz,j;
158   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   its = its*lits;
163   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
164   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
165   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
166   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
168 
169   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
170 
171   diag  = a->diag;
172   idiag = a->idiag;
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
178       x[0] = b[0]*idiag[0];
179       i2     = 1;
180       idiag += 1;
181       for (i=1; i<m; i++) {
182         v     = aa + ai[i];
183         vi    = aj + ai[i];
184         nz    = diag[i] - ai[i];
185         s1    = b[i2];
186         for (j=0; j<nz; j++) {
187           s1 -= v[j]*x[vi[j]];
188         }
189         x[i2]   = idiag[0]*s1;
190         idiag   += 1;
191         i2      += 1;
192       }
193       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
194       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
195     }
196     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
197         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
198       i2    = 0;
199       mdiag = a->idiag+a->mbs;
200       for (i=0; i<m; i++) {
201         x1      = x[i2];
202         x[i2]   = mdiag[0]*x1;
203         mdiag  += 1;
204         i2     += 1;
205       }
206       ierr = PetscLogFlops(m);CHKERRQ(ierr);
207     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
208       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
209     }
210     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
211       idiag   = a->idiag+a->mbs - 1;
212       i2      = m - 1;
213       x1      = x[i2];
214       x[i2]   = idiag[0]*x1;
215       idiag -= 1;
216       i2    -= 1;
217       for (i=m-2; i>=0; i--) {
218         v     = aa + (diag[i]+1);
219         vi    = aj + diag[i] + 1;
220         nz    = ai[i+1] - diag[i] - 1;
221         s1    = x[i2];
222         for (j=0; j<nz; j++) {
223           s1 -= v[j]*x[vi[j]];
224         }
225         x[i2]   = idiag[0]*s1;
226         idiag   -= 1;
227         i2      -= 1;
228       }
229       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
230     }
231   } else {
232     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
233   }
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   PetscScalar        *x,x1,x2,s1,s2;
245   const PetscScalar  *b;
246   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
247   PetscErrorCode     ierr;
248   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
249   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
250 
251   PetscFunctionBegin;
252   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
253   its = its*lits;
254   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
255   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
256   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
257   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
258   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
259 
260   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
261 
262   diag  = a->diag;
263   idiag = a->idiag;
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
266 
267   if (flag & SOR_ZERO_INITIAL_GUESS) {
268     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
269       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
270       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
271       i2     = 2;
272       idiag += 4;
273       for (i=1; i<m; i++) {
274 	v     = aa + 4*ai[i];
275 	vi    = aj + ai[i];
276 	nz    = diag[i] - ai[i];
277 	s1    = b[i2]; s2 = b[i2+1];
278         for (j=0; j<nz; j++) {
279 	  idx  = 2*vi[j];
280           it   = 4*j;
281 	  x1   = x[idx]; x2 = x[1+idx];
282 	  s1  -= v[it]*x1 + v[it+2]*x2;
283 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
284 	}
285 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
286 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
287         idiag   += 4;
288         i2      += 2;
289       }
290       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
291       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
292     }
293     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
294         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
295       i2    = 0;
296       mdiag = a->idiag+4*a->mbs;
297       for (i=0; i<m; i++) {
298         x1      = x[i2]; x2 = x[i2+1];
299         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
300         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
301         mdiag  += 4;
302         i2     += 2;
303       }
304       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
305     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
306       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
307     }
308     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
309       idiag   = a->idiag+4*a->mbs - 4;
310       i2      = 2*m - 2;
311       x1      = x[i2]; x2 = x[i2+1];
312       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
313       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
314       idiag -= 4;
315       i2    -= 2;
316       for (i=m-2; i>=0; i--) {
317 	v     = aa + 4*(diag[i]+1);
318 	vi    = aj + diag[i] + 1;
319 	nz    = ai[i+1] - diag[i] - 1;
320 	s1    = x[i2]; s2 = x[i2+1];
321         for (j=0; j<nz; j++) {
322  	  idx  = 2*vi[j];
323           it   = 4*j;
324 	  x1   = x[idx]; x2 = x[1+idx];
325 	  s1  -= v[it]*x1 + v[it+2]*x2;
326 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
327 	}
328 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
329 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
330         idiag   -= 4;
331         i2      -= 2;
332       }
333       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
334     }
335   } else {
336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
337   }
338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
348   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
349   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
350   const PetscScalar  *b;
351   PetscErrorCode     ierr;
352   PetscInt           m = a->mbs,i,i2,nz,idx;
353   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
354 
355   PetscFunctionBegin;
356   its = its*lits;
357   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
358   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
359   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
360   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
361   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
362   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
363 
364   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
365 
366   diag  = a->diag;
367   idiag = a->idiag;
368   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
369   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
370 
371   if (flag & SOR_ZERO_INITIAL_GUESS) {
372     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
373       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
374       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
375       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
376       i2     = 3;
377       idiag += 9;
378       for (i=1; i<m; i++) {
379         v     = aa + 9*ai[i];
380         vi    = aj + ai[i];
381         nz    = diag[i] - ai[i];
382         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
383         while (nz--) {
384           idx  = 3*(*vi++);
385           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
386           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
387           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
388           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v   += 9;
390         }
391         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
392         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
393         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
394         idiag   += 9;
395         i2      += 3;
396       }
397       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
398       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
399     }
400     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
401         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
402       i2    = 0;
403       mdiag = a->idiag+9*a->mbs;
404       for (i=0; i<m; i++) {
405         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
406         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
407         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
408         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
409         mdiag  += 9;
410         i2     += 3;
411       }
412       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
413     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
414       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
415     }
416     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
417       idiag   = a->idiag+9*a->mbs - 9;
418       i2      = 3*m - 3;
419       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
420       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
421       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
422       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
423       idiag -= 9;
424       i2    -= 3;
425       for (i=m-2; i>=0; i--) {
426         v     = aa + 9*(diag[i]+1);
427         vi    = aj + diag[i] + 1;
428         nz    = ai[i+1] - diag[i] - 1;
429         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
430         while (nz--) {
431           idx  = 3*(*vi++);
432           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
433           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
434           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
435           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
436           v   += 9;
437         }
438         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
439         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
440         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
441         idiag   -= 9;
442         i2      -= 3;
443       }
444       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
445     }
446   } else {
447     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
448   }
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
457 {
458   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
460   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
461   const PetscScalar  *b;
462   PetscErrorCode     ierr;
463   PetscInt           m = a->mbs,i,i2,nz,idx;
464   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
465 
466   PetscFunctionBegin;
467   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
468   its = its*lits;
469   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
470   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
471   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
472   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
473   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
474 
475   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
476 
477   diag  = a->diag;
478   idiag = a->idiag;
479   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
481 
482   if (flag & SOR_ZERO_INITIAL_GUESS) {
483     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
484       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
485       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
486       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
487       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
488       i2     = 4;
489       idiag += 16;
490       for (i=1; i<m; i++) {
491 	v     = aa + 16*ai[i];
492 	vi    = aj + ai[i];
493 	nz    = diag[i] - ai[i];
494 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
495 	while (nz--) {
496 	  idx  = 4*(*vi++);
497 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
498 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
499 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
500 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
501 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
502 	  v   += 16;
503 	}
504 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
505 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
506 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
507 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
508         idiag   += 16;
509         i2      += 4;
510       }
511       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
512       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
513     }
514     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
515         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
516       i2    = 0;
517       mdiag = a->idiag+16*a->mbs;
518       for (i=0; i<m; i++) {
519         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
521         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
522         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
523         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
524         mdiag  += 16;
525         i2     += 4;
526       }
527       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
528     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
529       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
530     }
531     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
532       idiag   = a->idiag+16*a->mbs - 16;
533       i2      = 4*m - 4;
534       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
535       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
536       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
537       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
538       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
539       idiag -= 16;
540       i2    -= 4;
541       for (i=m-2; i>=0; i--) {
542 	v     = aa + 16*(diag[i]+1);
543 	vi    = aj + diag[i] + 1;
544 	nz    = ai[i+1] - diag[i] - 1;
545 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
546 	while (nz--) {
547 	  idx  = 4*(*vi++);
548 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
549 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
550 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
551 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
552 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
553 	  v   += 16;
554 	}
555 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
556 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
557 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
558 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
559         idiag   -= 16;
560         i2      -= 4;
561       }
562       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
563     }
564   } else {
565     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
575 {
576   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
577   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
578   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
579   const PetscScalar  *b;
580   PetscErrorCode     ierr;
581   PetscInt           m = a->mbs,i,i2,nz,idx;
582   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
583 
584   PetscFunctionBegin;
585   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
586   its = its*lits;
587   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
588   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
589   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
590   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
591   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
592 
593   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
594 
595   diag  = a->diag;
596   idiag = a->idiag;
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
599 
600   if (flag & SOR_ZERO_INITIAL_GUESS) {
601     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
602       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
603       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
604       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
605       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
606       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
607       i2     = 5;
608       idiag += 25;
609       for (i=1; i<m; i++) {
610 	v     = aa + 25*ai[i];
611 	vi    = aj + ai[i];
612 	nz    = diag[i] - ai[i];
613 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
614 	while (nz--) {
615 	  idx  = 5*(*vi++);
616 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
617 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
618 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
619 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
620 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
621 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
622 	  v   += 25;
623 	}
624 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
625 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
626 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
627 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
628 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
629         idiag   += 25;
630         i2      += 5;
631       }
632       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
633       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
634     }
635     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
636         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
637       i2    = 0;
638       mdiag = a->idiag+25*a->mbs;
639       for (i=0; i<m; i++) {
640         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
642         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
643         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
644         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
645         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
646         mdiag  += 25;
647         i2     += 5;
648       }
649       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
650     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
651       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
652     }
653     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
654       idiag   = a->idiag+25*a->mbs - 25;
655       i2      = 5*m - 5;
656       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
657       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
658       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
659       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
660       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
661       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
662       idiag -= 25;
663       i2    -= 5;
664       for (i=m-2; i>=0; i--) {
665 	v     = aa + 25*(diag[i]+1);
666 	vi    = aj + diag[i] + 1;
667 	nz    = ai[i+1] - diag[i] - 1;
668 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
669 	while (nz--) {
670 	  idx  = 5*(*vi++);
671 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
672 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
673 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
674 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
675 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
676 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
677 	  v   += 25;
678 	}
679 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
680 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
681 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
682 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
683 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
684         idiag   -= 25;
685         i2      -= 5;
686       }
687       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
688     }
689   } else {
690     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
700 {
701   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
702   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
703   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
704   const PetscScalar  *b;
705   PetscErrorCode     ierr;
706   PetscInt           m = a->mbs,i,i2,nz,idx;
707   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
708 
709   PetscFunctionBegin;
710   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
711   its = its*lits;
712   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
713   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
714   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
715   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
716   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
717 
718   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
719 
720   diag  = a->diag;
721   idiag = a->idiag;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
724 
725   if (flag & SOR_ZERO_INITIAL_GUESS) {
726     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
727       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
728       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
729       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
730       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
731       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
732       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
733       i2     = 6;
734       idiag += 36;
735       for (i=1; i<m; i++) {
736         v     = aa + 36*ai[i];
737         vi    = aj + ai[i];
738         nz    = diag[i] - ai[i];
739         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
740         while (nz--) {
741           idx  = 6*(*vi++);
742           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
743           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
744           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
745           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
746           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
747           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
748           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
749           v   += 36;
750         }
751         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
752         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
753         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
754         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
755         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
756         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
757         idiag   += 36;
758         i2      += 6;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
762     }
763     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
764         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
765       i2    = 0;
766       mdiag = a->idiag+36*a->mbs;
767       for (i=0; i<m; i++) {
768         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
769         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
770         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
771         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
772         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
773         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
774         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
775         mdiag  += 36;
776         i2     += 6;
777       }
778       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
779     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
780       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
781     }
782     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
783       idiag   = a->idiag+36*a->mbs - 36;
784       i2      = 6*m - 6;
785       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
786       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
787       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
788       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
789       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
790       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
791       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
792       idiag -= 36;
793       i2    -= 6;
794       for (i=m-2; i>=0; i--) {
795         v     = aa + 36*(diag[i]+1);
796         vi    = aj + diag[i] + 1;
797         nz    = ai[i+1] - diag[i] - 1;
798         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
799         while (nz--) {
800           idx  = 6*(*vi++);
801           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
802           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
803           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
804           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
805           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
806           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
807           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
808           v   += 36;
809         }
810         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
811         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
812         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
813         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
814         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
815         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
816         idiag   -= 36;
817         i2      -= 6;
818       }
819       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
820     }
821   } else {
822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
823   }
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
832 {
833   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
834   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
835   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
836   const PetscScalar  *b;
837   PetscErrorCode     ierr;
838   PetscInt           m = a->mbs,i,i2,nz,idx;
839   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
840 
841   PetscFunctionBegin;
842   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
843   its = its*lits;
844   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
845   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
846   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
847   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
848   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
849 
850   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
851 
852   diag  = a->diag;
853   idiag = a->idiag;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
856 
857   if (flag & SOR_ZERO_INITIAL_GUESS) {
858     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
859       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
860       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
861       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
862       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
863       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
864       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
865       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
866       i2     = 7;
867       idiag += 49;
868       for (i=1; i<m; i++) {
869         v     = aa + 49*ai[i];
870         vi    = aj + ai[i];
871         nz    = diag[i] - ai[i];
872         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
873         while (nz--) {
874           idx  = 7*(*vi++);
875           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
876           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
877           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
878           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
879           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
880           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
881           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
882           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
883           v   += 49;
884         }
885         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
886         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
887         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
888         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
889         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
890         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
891         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
892         idiag   += 49;
893         i2      += 7;
894       }
895       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
896       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
897     }
898     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
899         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
900       i2    = 0;
901       mdiag = a->idiag+49*a->mbs;
902       for (i=0; i<m; i++) {
903         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
904         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
905         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
906         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
907         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
908         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
909         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
910         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
911         mdiag  += 49;
912         i2     += 7;
913       }
914       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
915     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
916       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
917     }
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
919       idiag   = a->idiag+49*a->mbs - 49;
920       i2      = 7*m - 7;
921       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
922       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
923       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
924       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
925       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
926       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
927       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
928       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
929       idiag -= 49;
930       i2    -= 7;
931       for (i=m-2; i>=0; i--) {
932         v     = aa + 49*(diag[i]+1);
933         vi    = aj + diag[i] + 1;
934         nz    = ai[i+1] - diag[i] - 1;
935         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
936         while (nz--) {
937           idx  = 7*(*vi++);
938           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
939           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
940           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
941           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
942           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
943           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
944           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
945           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
946           v   += 49;
947         }
948         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
949         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
950         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
951         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
952         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
953         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
954         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
955         idiag   -= 49;
956         i2      -= 7;
957       }
958       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
959     }
960   } else {
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
971 {
972   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar        *x,*work,*w,*workt;
974   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
975   const PetscScalar  *b;
976   PetscErrorCode     ierr;
977   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
978   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
979 
980   PetscFunctionBegin;
981   its = its*lits;
982   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
983   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
984   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
985   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
986   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
987   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
988 
989   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
990 
991   diag  = a->diag;
992   idiag = a->idiag;
993   if (!a->mult_work) {
994     k    = PetscMax(A->rmap->n,A->cmap->n);
995     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
996   }
997   work = a->mult_work;
998   if (!a->sor_work) {
999     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1000   }
1001   w = a->sor_work;
1002 
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1005 
1006   if (flag & SOR_ZERO_INITIAL_GUESS) {
1007     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1008       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1009       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1010       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1011       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1012       i2     = bs;
1013       idiag += bs2;
1014       for (i=1; i<m; i++) {
1015         v     = aa + bs2*ai[i];
1016         vi    = aj + ai[i];
1017         nz    = diag[i] - ai[i];
1018 
1019         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1020 	/* copy all rows of x that are needed into contiguous space */
1021         workt = work;
1022         for (j=0; j<nz; j++) {
1023           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024           workt += bs;
1025         }
1026         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1027        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1028         while (nz--) {
1029           idx  = N*(*vi++);
1030           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1031           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1032           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1033           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1034           v   += N2;
1035 	  } */
1036 
1037         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1038         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1039         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1040         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1041 
1042         idiag   += bs2;
1043         i2      += bs;
1044       }
1045       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1046       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1047     }
1048     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1049         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1050       i2    = 0;
1051       mdiag = a->idiag+bs2*a->mbs;
1052       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053       for (i=0; i<m; i++) {
1054         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1055         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1056         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1057         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1058         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1059 
1060         mdiag  += bs2;
1061         i2     += bs;
1062       }
1063       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1064     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1068       idiag   = a->idiag+bs2*a->mbs - bs2;
1069       i2      = bs*m - bs;
1070       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1071       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1072       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1073       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1074       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1075       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1076       idiag -= bs2;
1077       i2    -= bs;
1078       for (i=m-2; i>=0; i--) {
1079         v     = aa + bs2*(diag[i]+1);
1080         vi    = aj + diag[i] + 1;
1081         nz    = ai[i+1] - diag[i] - 1;
1082 
1083         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1084 	/* copy all rows of x that are needed into contiguous space */
1085         workt = work;
1086         for (j=0; j<nz; j++) {
1087           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1088           workt += bs;
1089         }
1090         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1091         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1092         while (nz--) {
1093           idx  = N*(*vi++);
1094           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1095           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1096           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1097           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1098           v   += N2;
1099 	  } */
1100 
1101         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1102         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1103         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1104         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1105         idiag   -= bs2;
1106         i2      -= bs;
1107       }
1108       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1109     }
1110   } else {
1111     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1112   }
1113   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1120 */
1121 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1124 #define matsetvaluesblocked4_ matsetvaluesblocked4
1125 #endif
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "matsetvaluesblocked4_"
1130 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1131 {
1132   Mat               A = *AA;
1133   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1134   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1135   PetscInt          *ai=a->i,*ailen=a->ilen;
1136   PetscInt          *aj=a->j,stepval,lastcol = -1;
1137   const PetscScalar *value = v;
1138   MatScalar         *ap,*aa = a->a,*bap;
1139 
1140   PetscFunctionBegin;
1141   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1142   stepval = (n-1)*4;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k];
1145     rp   = aj + ai[row];
1146     ap   = aa + 16*ai[row];
1147     nrow = ailen[row];
1148     low  = 0;
1149     high = nrow;
1150     for (l=0; l<n; l++) { /* loop over added columns */
1151       col = in[l];
1152       if (col <= lastcol) low = 0; else high = nrow;
1153       lastcol = col;
1154       value = v + k*(stepval+4 + l)*4;
1155       while (high-low > 7) {
1156         t = (low+high)/2;
1157         if (rp[t] > col) high = t;
1158         else             low  = t;
1159       }
1160       for (i=low; i<high; i++) {
1161         if (rp[i] > col) break;
1162         if (rp[i] == col) {
1163           bap  = ap +  16*i;
1164           for (ii=0; ii<4; ii++,value+=stepval) {
1165             for (jj=ii; jj<16; jj+=4) {
1166               bap[jj] += *value++;
1167             }
1168           }
1169           goto noinsert2;
1170         }
1171       }
1172       N = nrow++ - 1;
1173       high++; /* added new column index thus must search to one higher than before */
1174       /* shift up all the later entries in this row */
1175       for (ii=N; ii>=i; ii--) {
1176         rp[ii+1] = rp[ii];
1177         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1178       }
1179       if (N >= i) {
1180         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1181       }
1182       rp[i] = col;
1183       bap   = ap +  16*i;
1184       for (ii=0; ii<4; ii++,value+=stepval) {
1185         for (jj=ii; jj<16; jj+=4) {
1186           bap[jj] = *value++;
1187         }
1188       }
1189       noinsert2:;
1190       low = i;
1191     }
1192     ailen[row] = nrow;
1193   }
1194   PetscFunctionReturnVoid();
1195 }
1196 EXTERN_C_END
1197 
1198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1199 #define matsetvalues4_ MATSETVALUES4
1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1201 #define matsetvalues4_ matsetvalues4
1202 #endif
1203 
1204 EXTERN_C_BEGIN
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSetValues4_"
1207 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1208 {
1209   Mat         A = *AA;
1210   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1211   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1212   PetscInt    *ai=a->i,*ailen=a->ilen;
1213   PetscInt    *aj=a->j,brow,bcol;
1214   PetscInt    ridx,cidx,lastcol = -1;
1215   MatScalar   *ap,value,*aa=a->a,*bap;
1216 
1217   PetscFunctionBegin;
1218   for (k=0; k<m; k++) { /* loop over added rows */
1219     row  = im[k]; brow = row/4;
1220     rp   = aj + ai[brow];
1221     ap   = aa + 16*ai[brow];
1222     nrow = ailen[brow];
1223     low  = 0;
1224     high = nrow;
1225     for (l=0; l<n; l++) { /* loop over added columns */
1226       col = in[l]; bcol = col/4;
1227       ridx = row % 4; cidx = col % 4;
1228       value = v[l + k*n];
1229       if (col <= lastcol) low = 0; else high = nrow;
1230       lastcol = col;
1231       while (high-low > 7) {
1232         t = (low+high)/2;
1233         if (rp[t] > bcol) high = t;
1234         else              low  = t;
1235       }
1236       for (i=low; i<high; i++) {
1237         if (rp[i] > bcol) break;
1238         if (rp[i] == bcol) {
1239           bap  = ap +  16*i + 4*cidx + ridx;
1240           *bap += value;
1241           goto noinsert1;
1242         }
1243       }
1244       N = nrow++ - 1;
1245       high++; /* added new column thus must search to one higher than before */
1246       /* shift up all the later entries in this row */
1247       for (ii=N; ii>=i; ii--) {
1248         rp[ii+1] = rp[ii];
1249         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1250       }
1251       if (N>=i) {
1252         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1253       }
1254       rp[i]                    = bcol;
1255       ap[16*i + 4*cidx + ridx] = value;
1256       noinsert1:;
1257       low = i;
1258     }
1259     ailen[brow] = nrow;
1260   }
1261   PetscFunctionReturnVoid();
1262 }
1263 EXTERN_C_END
1264 
1265 /*
1266      Checks for missing diagonals
1267 */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274   PetscInt       *diag,*jj = a->j,i;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1278   *missing = PETSC_FALSE;
1279   if (A->rmap->n > 0 && !jj) {
1280     *missing  = PETSC_TRUE;
1281     if (d) *d = 0;
1282     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1283   } else {
1284     diag     = a->diag;
1285     for (i=0; i<a->mbs; i++) {
1286       if (jj[diag[i]] != i) {
1287         *missing  = PETSC_TRUE;
1288         if (d) *d = i;
1289         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1290       }
1291     }
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1299 {
1300   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302   PetscInt       i,j,m = a->mbs;
1303 
1304   PetscFunctionBegin;
1305   if (!a->diag) {
1306     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1307     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1308     a->free_diag = PETSC_TRUE;
1309   }
1310   for (i=0; i<m; i++) {
1311     a->diag[i] = a->i[i+1];
1312     for (j=a->i[i]; j<a->i[i+1]; j++) {
1313       if (a->j[j] == i) {
1314         a->diag[i] = j;
1315         break;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1332   PetscInt       *tia, *tja;
1333 
1334   PetscFunctionBegin;
1335   *nn = n;
1336   if (!ia) PetscFunctionReturn(0);
1337   if (symmetric) {
1338     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1339     nz   = tia[n];
1340   } else {
1341     tia = a->i; tja = a->j;
1342   }
1343 
1344   if (!blockcompressed && bs > 1) {
1345     (*nn) *= bs;
1346     /* malloc & create the natural set of indices */
1347     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1348     if (n) {
1349       (*ia)[0] = 0;
1350       for (j=1; j<bs; j++) {
1351         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1352       }
1353     }
1354 
1355     for (i=1; i<n; i++) {
1356       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1357       for (j=1; j<bs; j++) {
1358         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1359       }
1360     }
1361     if (n) {
1362       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1363     }
1364 
1365     if (ja) {
1366       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1367       cnt = 0;
1368       for (i=0; i<n; i++) {
1369         for (j=0; j<bs; j++) {
1370           for (k=tia[i]; k<tia[i+1]; k++) {
1371             for (l=0; l<bs; l++) {
1372               (*ja)[cnt++] = bs*tja[k] + l;
1373 	    }
1374           }
1375         }
1376       }
1377     }
1378 
1379     n     *= bs;
1380     nz *= bs*bs;
1381     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1382       ierr = PetscFree(tia);CHKERRQ(ierr);
1383       ierr = PetscFree(tja);CHKERRQ(ierr);
1384     }
1385   } else if (oshift == 1) {
1386     if (symmetric) {
1387       PetscInt nz = tia[A->rmap->n/bs];
1388       /*  add 1 to i and j indices */
1389       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1390       *ia = tia;
1391       if (ja) {
1392 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1393         *ja = tja;
1394       }
1395     } else {
1396       PetscInt nz = a->i[A->rmap->n/bs];
1397       /* malloc space and  add 1 to i and j indices */
1398       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1399       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1400       if (ja) {
1401 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1402 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1403       }
1404     }
1405   } else {
1406     *ia = tia;
1407     if (ja) *ja = tja;
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1416 {
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!ia) PetscFunctionReturn(0);
1421   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1422     ierr = PetscFree(*ia);CHKERRQ(ierr);
1423     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436 #if defined(PETSC_USE_LOG)
1437   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1438 #endif
1439   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1441   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1442   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1443   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1444   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1445   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1446   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1447   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1449   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1450   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1451   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1452 
1453   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1454   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1455   ierr = PetscFree(a);CHKERRQ(ierr);
1456 
1457   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1458   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1459   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1460   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1461   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1472 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1473 {
1474   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   switch (op) {
1479   case MAT_ROW_ORIENTED:
1480     a->roworiented    = flg;
1481     break;
1482   case MAT_KEEP_NONZERO_PATTERN:
1483     a->keepnonzeropattern = flg;
1484     break;
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486     a->nonew          = (flg ? 0 : 1);
1487     break;
1488   case MAT_NEW_NONZERO_LOCATION_ERR:
1489     a->nonew          = (flg ? -1 : 0);
1490     break;
1491   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1492     a->nonew          = (flg ? -2 : 0);
1493     break;
1494   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1495     a->nounused       = (flg ? -1 : 0);
1496     break;
1497   case MAT_CHECK_COMPRESSED_ROW:
1498     a->compressedrow.check = flg;
1499     break;
1500   case MAT_NEW_DIAGONALS:
1501   case MAT_IGNORE_OFF_PROC_ENTRIES:
1502   case MAT_USE_HASH_TABLE:
1503     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_SYMMETRIC:
1506   case MAT_STRUCTURALLY_SYMMETRIC:
1507   case MAT_HERMITIAN:
1508   case MAT_SYMMETRY_ETERNAL:
1509     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1510     break;
1511   default:
1512     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1519 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1520 {
1521   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1522   PetscErrorCode ierr;
1523   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1524   MatScalar      *aa,*aa_i;
1525   PetscScalar    *v_i;
1526 
1527   PetscFunctionBegin;
1528   bs  = A->rmap->bs;
1529   ai  = a->i;
1530   aj  = a->j;
1531   aa  = a->a;
1532   bs2 = a->bs2;
1533 
1534   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1535 
1536   bn  = row/bs;   /* Block number */
1537   bp  = row % bs; /* Block Position */
1538   M   = ai[bn+1] - ai[bn];
1539   *nz = bs*M;
1540 
1541   if (v) {
1542     *v = 0;
1543     if (*nz) {
1544       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1545       for (i=0; i<M; i++) { /* for each block in the block row */
1546         v_i  = *v + i*bs;
1547         aa_i = aa + bs2*(ai[bn] + i);
1548         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1549       }
1550     }
1551   }
1552 
1553   if (idx) {
1554     *idx = 0;
1555     if (*nz) {
1556       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1557       for (i=0; i<M; i++) { /* for each block in the block row */
1558         idx_i = *idx + i*bs;
1559         itmp  = bs*aj[ai[bn] + i];
1560         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1561       }
1562     }
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1569 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1570 {
1571   PetscErrorCode ierr;
1572 
1573   PetscFunctionBegin;
1574   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1575   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1583 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1584 {
1585   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1586   Mat            C;
1587   PetscErrorCode ierr;
1588   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1589   PetscInt       *rows,*cols,bs2=a->bs2;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1595     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1596     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1597 
1598     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1599     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1600     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1601     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1602     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1603     ierr = PetscFree(col);CHKERRQ(ierr);
1604   } else {
1605     C = *B;
1606   }
1607 
1608   array = a->a;
1609   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1610   for (i=0; i<mbs; i++) {
1611     cols[0] = i*bs;
1612     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1613     len = ai[i+1] - ai[i];
1614     for (j=0; j<len; j++) {
1615       rows[0] = (*aj++)*bs;
1616       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1617       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1618       array += bs2;
1619     }
1620   }
1621   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1622 
1623   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625 
1626   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1627     *B = C;
1628   } else {
1629     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1636 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1637 {
1638   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1639   PetscErrorCode ierr;
1640   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1641   int            fd;
1642   PetscScalar    *aa;
1643   FILE           *file;
1644 
1645   PetscFunctionBegin;
1646   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1647   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1648   col_lens[0] = MAT_FILE_CLASSID;
1649 
1650   col_lens[1] = A->rmap->N;
1651   col_lens[2] = A->cmap->n;
1652   col_lens[3] = a->nz*bs2;
1653 
1654   /* store lengths of each row and write (including header) to file */
1655   count = 0;
1656   for (i=0; i<a->mbs; i++) {
1657     for (j=0; j<bs; j++) {
1658       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1659     }
1660   }
1661   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1662   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1663 
1664   /* store column indices (zero start index) */
1665   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1666   count = 0;
1667   for (i=0; i<a->mbs; i++) {
1668     for (j=0; j<bs; j++) {
1669       for (k=a->i[i]; k<a->i[i+1]; k++) {
1670         for (l=0; l<bs; l++) {
1671           jj[count++] = bs*a->j[k] + l;
1672         }
1673       }
1674     }
1675   }
1676   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1677   ierr = PetscFree(jj);CHKERRQ(ierr);
1678 
1679   /* store nonzero values */
1680   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1681   count = 0;
1682   for (i=0; i<a->mbs; i++) {
1683     for (j=0; j<bs; j++) {
1684       for (k=a->i[i]; k<a->i[i+1]; k++) {
1685         for (l=0; l<bs; l++) {
1686           aa[count++] = a->a[bs2*k + l*bs + j];
1687         }
1688       }
1689     }
1690   }
1691   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1692   ierr = PetscFree(aa);CHKERRQ(ierr);
1693 
1694   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1695   if (file) {
1696     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1703 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1704 {
1705   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1706   PetscErrorCode    ierr;
1707   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1708   PetscViewerFormat format;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1712   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1713     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1714   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1715     Mat aij;
1716     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1717     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1718     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1719   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1720      PetscFunctionReturn(0);
1721   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1722     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1723     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1724     for (i=0; i<a->mbs; i++) {
1725       for (j=0; j<bs; j++) {
1726         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1727         for (k=a->i[i]; k<a->i[i+1]; k++) {
1728           for (l=0; l<bs; l++) {
1729 #if defined(PETSC_USE_COMPLEX)
1730             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1732                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1733             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1734               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1735                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1738             }
1739 #else
1740             if (a->a[bs2*k + l*bs + j] != 0.0) {
1741               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1742             }
1743 #endif
1744           }
1745         }
1746         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1747       }
1748     }
1749     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1750   } else {
1751     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1752     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1753     for (i=0; i<a->mbs; i++) {
1754       for (j=0; j<bs; j++) {
1755         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1756         for (k=a->i[i]; k<a->i[i+1]; k++) {
1757           for (l=0; l<bs; l++) {
1758 #if defined(PETSC_USE_COMPLEX)
1759             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1760               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1761                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1762             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1763               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1764                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             } else {
1766               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1767             }
1768 #else
1769             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1770 #endif
1771           }
1772         }
1773         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1774       }
1775     }
1776     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1777   }
1778   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1784 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1785 {
1786   Mat            A = (Mat) Aa;
1787   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1788   PetscErrorCode ierr;
1789   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1790   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1791   MatScalar      *aa;
1792   PetscViewer    viewer;
1793 
1794   PetscFunctionBegin;
1795 
1796   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1797   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1798 
1799   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1800 
1801   /* loop over matrix elements drawing boxes */
1802   color = PETSC_DRAW_BLUE;
1803   for (i=0,row=0; i<mbs; i++,row+=bs) {
1804     for (j=a->i[i]; j<a->i[i+1]; j++) {
1805       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807       aa = a->a + j*bs2;
1808       for (k=0; k<bs; k++) {
1809         for (l=0; l<bs; l++) {
1810           if (PetscRealPart(*aa++) >=  0.) continue;
1811           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1812         }
1813       }
1814     }
1815   }
1816   color = PETSC_DRAW_CYAN;
1817   for (i=0,row=0; i<mbs; i++,row+=bs) {
1818     for (j=a->i[i]; j<a->i[i+1]; j++) {
1819       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821       aa = a->a + j*bs2;
1822       for (k=0; k<bs; k++) {
1823         for (l=0; l<bs; l++) {
1824           if (PetscRealPart(*aa++) != 0.) continue;
1825           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1826         }
1827       }
1828     }
1829   }
1830 
1831   color = PETSC_DRAW_RED;
1832   for (i=0,row=0; i<mbs; i++,row+=bs) {
1833     for (j=a->i[i]; j<a->i[i+1]; j++) {
1834       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1835       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1836       aa = a->a + j*bs2;
1837       for (k=0; k<bs; k++) {
1838         for (l=0; l<bs; l++) {
1839           if (PetscRealPart(*aa++) <= 0.) continue;
1840           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1841         }
1842       }
1843     }
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1850 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1851 {
1852   PetscErrorCode ierr;
1853   PetscReal      xl,yl,xr,yr,w,h;
1854   PetscDraw      draw;
1855   PetscBool      isnull;
1856 
1857   PetscFunctionBegin;
1858 
1859   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1860   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1861 
1862   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1863   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1864   xr += w;    yr += h;  xl = -w;     yl = -h;
1865   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1866   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1867   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatView_SeqBAIJ"
1873 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscBool      iascii,isbinary,isdraw;
1877 
1878   PetscFunctionBegin;
1879   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1880   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1881   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1882   if (iascii){
1883     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1884   } else if (isbinary) {
1885     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1886   } else if (isdraw) {
1887     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1888   } else {
1889     Mat B;
1890     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1891     ierr = MatView(B,viewer);CHKERRQ(ierr);
1892     ierr = MatDestroy(&B);CHKERRQ(ierr);
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1900 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1901 {
1902   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1903   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1904   PetscInt    *ai = a->i,*ailen = a->ilen;
1905   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1906   MatScalar   *ap,*aa = a->a;
1907 
1908   PetscFunctionBegin;
1909   for (k=0; k<m; k++) { /* loop over rows */
1910     row  = im[k]; brow = row/bs;
1911     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1912     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1913     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1914     nrow = ailen[brow];
1915     for (l=0; l<n; l++) { /* loop over columns */
1916       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1917       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1918       col  = in[l] ;
1919       bcol = col/bs;
1920       cidx = col%bs;
1921       ridx = row%bs;
1922       high = nrow;
1923       low  = 0; /* assume unsorted */
1924       while (high-low > 5) {
1925         t = (low+high)/2;
1926         if (rp[t] > bcol) high = t;
1927         else             low  = t;
1928       }
1929       for (i=low; i<high; i++) {
1930         if (rp[i] > bcol) break;
1931         if (rp[i] == bcol) {
1932           *v++ = ap[bs2*i+bs*cidx+ridx];
1933           goto finished;
1934         }
1935       }
1936       *v++ = 0.0;
1937       finished:;
1938     }
1939   }
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1945 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1946 {
1947   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1948   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1949   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1950   PetscErrorCode    ierr;
1951   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1952   PetscBool         roworiented=a->roworiented;
1953   const PetscScalar *value = v;
1954   MatScalar         *ap,*aa = a->a,*bap;
1955 
1956   PetscFunctionBegin;
1957   if (roworiented) {
1958     stepval = (n-1)*bs;
1959   } else {
1960     stepval = (m-1)*bs;
1961   }
1962   for (k=0; k<m; k++) { /* loop over added rows */
1963     row  = im[k];
1964     if (row < 0) continue;
1965 #if defined(PETSC_USE_DEBUG)
1966     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1967 #endif
1968     rp   = aj + ai[row];
1969     ap   = aa + bs2*ai[row];
1970     rmax = imax[row];
1971     nrow = ailen[row];
1972     low  = 0;
1973     high = nrow;
1974     for (l=0; l<n; l++) { /* loop over added columns */
1975       if (in[l] < 0) continue;
1976 #if defined(PETSC_USE_DEBUG)
1977       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1978 #endif
1979       col = in[l];
1980       if (roworiented) {
1981         value = v + (k*(stepval+bs) + l)*bs;
1982       } else {
1983         value = v + (l*(stepval+bs) + k)*bs;
1984       }
1985       if (col <= lastcol) low = 0; else high = nrow;
1986       lastcol = col;
1987       while (high-low > 7) {
1988         t = (low+high)/2;
1989         if (rp[t] > col) high = t;
1990         else             low  = t;
1991       }
1992       for (i=low; i<high; i++) {
1993         if (rp[i] > col) break;
1994         if (rp[i] == col) {
1995           bap  = ap +  bs2*i;
1996           if (roworiented) {
1997             if (is == ADD_VALUES) {
1998               for (ii=0; ii<bs; ii++,value+=stepval) {
1999                 for (jj=ii; jj<bs2; jj+=bs) {
2000                   bap[jj] += *value++;
2001                 }
2002               }
2003             } else {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] = *value++;
2007                 }
2008               }
2009             }
2010           } else {
2011             if (is == ADD_VALUES) {
2012               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2013                 for (jj=0; jj<bs; jj++) {
2014                   bap[jj] += value[jj];
2015                 }
2016                 bap += bs;
2017               }
2018             } else {
2019               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2020                 for (jj=0; jj<bs; jj++) {
2021                   bap[jj]  = value[jj];
2022                 }
2023                 bap += bs;
2024               }
2025             }
2026           }
2027           goto noinsert2;
2028         }
2029       }
2030       if (nonew == 1) goto noinsert2;
2031       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2032       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2033       N = nrow++ - 1; high++;
2034       /* shift up all the later entries in this row */
2035       for (ii=N; ii>=i; ii--) {
2036         rp[ii+1] = rp[ii];
2037         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2038       }
2039       if (N >= i) {
2040         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2041       }
2042       rp[i] = col;
2043       bap   = ap +  bs2*i;
2044       if (roworiented) {
2045         for (ii=0; ii<bs; ii++,value+=stepval) {
2046           for (jj=ii; jj<bs2; jj+=bs) {
2047             bap[jj] = *value++;
2048           }
2049         }
2050       } else {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=0; jj<bs; jj++) {
2053             *bap++  = *value++;
2054           }
2055         }
2056       }
2057       noinsert2:;
2058       low = i;
2059     }
2060     ailen[row] = nrow;
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2067 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2068 {
2069   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2070   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2071   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2072   PetscErrorCode ierr;
2073   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2074   MatScalar      *aa = a->a,*ap;
2075   PetscReal      ratio=0.6;
2076 
2077   PetscFunctionBegin;
2078   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2079 
2080   if (m) rmax = ailen[0];
2081   for (i=1; i<mbs; i++) {
2082     /* move each row back by the amount of empty slots (fshift) before it*/
2083     fshift += imax[i-1] - ailen[i-1];
2084     rmax   = PetscMax(rmax,ailen[i]);
2085     if (fshift) {
2086       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2087       N = ailen[i];
2088       for (j=0; j<N; j++) {
2089         ip[j-fshift] = ip[j];
2090         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2091       }
2092     }
2093     ai[i] = ai[i-1] + ailen[i-1];
2094   }
2095   if (mbs) {
2096     fshift += imax[mbs-1] - ailen[mbs-1];
2097     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2098   }
2099   /* reset ilen and imax for each row */
2100   for (i=0; i<mbs; i++) {
2101     ailen[i] = imax[i] = ai[i+1] - ai[i];
2102   }
2103   a->nz = ai[mbs];
2104 
2105   /* diagonals may have moved, so kill the diagonal pointers */
2106   a->idiagvalid = PETSC_FALSE;
2107   if (fshift && a->diag) {
2108     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2109     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2110     a->diag = 0;
2111   }
2112   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2113   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2114   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2115   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2116   A->info.mallocs     += a->reallocs;
2117   a->reallocs          = 0;
2118   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2119 
2120   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2121   A->same_nonzero = PETSC_TRUE;
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 /*
2126    This function returns an array of flags which indicate the locations of contiguous
2127    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2128    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2129    Assume: sizes should be long enough to hold all the values.
2130 */
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2133 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2134 {
2135   PetscInt   i,j,k,row;
2136   PetscBool  flg;
2137 
2138   PetscFunctionBegin;
2139   for (i=0,j=0; i<n; j++) {
2140     row = idx[i];
2141     if (row%bs!=0) { /* Not the begining of a block */
2142       sizes[j] = 1;
2143       i++;
2144     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2145       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2146       i++;
2147     } else { /* Begining of the block, so check if the complete block exists */
2148       flg = PETSC_TRUE;
2149       for (k=1; k<bs; k++) {
2150         if (row+k != idx[i+k]) { /* break in the block */
2151           flg = PETSC_FALSE;
2152           break;
2153         }
2154       }
2155       if (flg) { /* No break in the bs */
2156         sizes[j] = bs;
2157         i+= bs;
2158       } else {
2159         sizes[j] = 1;
2160         i++;
2161       }
2162     }
2163   }
2164   *bs_max = j;
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2170 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2171 {
2172   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2173   PetscErrorCode    ierr;
2174   PetscInt          i,j,k,count,*rows;
2175   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2176   PetscScalar       zero = 0.0;
2177   MatScalar         *aa;
2178   const PetscScalar *xx;
2179   PetscScalar       *bb;
2180 
2181   PetscFunctionBegin;
2182   /* fix right hand side if needed */
2183   if (x && b) {
2184     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2185     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2186     for (i=0; i<is_n; i++) {
2187       bb[is_idx[i]] = diag*xx[is_idx[i]];
2188     }
2189     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2190     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2191   }
2192 
2193   /* Make a copy of the IS and  sort it */
2194   /* allocate memory for rows,sizes */
2195   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2196 
2197   /* copy IS values to rows, and sort them */
2198   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2199   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2200 
2201   if (baij->keepnonzeropattern) {
2202     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2203     bs_max = is_n;
2204     A->same_nonzero = PETSC_TRUE;
2205   } else {
2206     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2207     A->same_nonzero = PETSC_FALSE;
2208   }
2209 
2210   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2211     row   = rows[j];
2212     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2213     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2214     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2215     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2216       if (diag != 0.0) {
2217         if (baij->ilen[row/bs] > 0) {
2218           baij->ilen[row/bs]       = 1;
2219           baij->j[baij->i[row/bs]] = row/bs;
2220           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2221         }
2222         /* Now insert all the diagonal values for this bs */
2223         for (k=0; k<bs; k++) {
2224           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2225         }
2226       } else { /* (diag == 0.0) */
2227         baij->ilen[row/bs] = 0;
2228       } /* end (diag == 0.0) */
2229     } else { /* (sizes[i] != bs) */
2230 #if defined (PETSC_USE_DEBUG)
2231       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2232 #endif
2233       for (k=0; k<count; k++) {
2234         aa[0] =  zero;
2235         aa    += bs;
2236       }
2237       if (diag != 0.0) {
2238         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2239       }
2240     }
2241   }
2242 
2243   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2244   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2250 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2251 {
2252   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2253   PetscErrorCode    ierr;
2254   PetscInt          i,j,k,count;
2255   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2256   PetscScalar       zero = 0.0;
2257   MatScalar         *aa;
2258   const PetscScalar *xx;
2259   PetscScalar       *bb;
2260   PetscBool         *zeroed,vecs = PETSC_FALSE;
2261 
2262   PetscFunctionBegin;
2263   /* fix right hand side if needed */
2264   if (x && b) {
2265     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2266     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2267     vecs = PETSC_TRUE;
2268   }
2269   A->same_nonzero = PETSC_TRUE;
2270 
2271   /* zero the columns */
2272   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2273   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2274   for (i=0; i<is_n; i++) {
2275     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2276     zeroed[is_idx[i]] = PETSC_TRUE;
2277   }
2278   for (i=0; i<A->rmap->N; i++) {
2279     if (!zeroed[i]) {
2280       row = i/bs;
2281       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2282         for (k=0; k<bs; k++) {
2283           col = bs*baij->j[j] + k;
2284 	  if (zeroed[col]) {
2285 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2286             if (vecs) bb[i] -= aa[0]*xx[col];
2287             aa[0] = 0.0;
2288           }
2289         }
2290       }
2291     } else if (vecs) bb[i] = diag*xx[i];
2292   }
2293   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2294   if (vecs) {
2295     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2296     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2297   }
2298 
2299   /* zero the rows */
2300   for (i=0; i<is_n; i++) {
2301     row   = is_idx[i];
2302     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2303     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2304     for (k=0; k<count; k++) {
2305       aa[0] =  zero;
2306       aa    += bs;
2307     }
2308     if (diag != 0.0) {
2309       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2310     }
2311   }
2312   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2318 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2319 {
2320   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2321   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2322   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2323   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2324   PetscErrorCode ierr;
2325   PetscInt       ridx,cidx,bs2=a->bs2;
2326   PetscBool      roworiented=a->roworiented;
2327   MatScalar      *ap,value,*aa=a->a,*bap;
2328 
2329   PetscFunctionBegin;
2330   if (v) PetscValidScalarPointer(v,6);
2331   for (k=0; k<m; k++) { /* loop over added rows */
2332     row  = im[k];
2333     brow = row/bs;
2334     if (row < 0) continue;
2335 #if defined(PETSC_USE_DEBUG)
2336     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2337 #endif
2338     rp   = aj + ai[brow];
2339     ap   = aa + bs2*ai[brow];
2340     rmax = imax[brow];
2341     nrow = ailen[brow];
2342     low  = 0;
2343     high = nrow;
2344     for (l=0; l<n; l++) { /* loop over added columns */
2345       if (in[l] < 0) continue;
2346 #if defined(PETSC_USE_DEBUG)
2347       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2348 #endif
2349       col = in[l]; bcol = col/bs;
2350       ridx = row % bs; cidx = col % bs;
2351       if (roworiented) {
2352         value = v[l + k*n];
2353       } else {
2354         value = v[k + l*m];
2355       }
2356       if (col <= lastcol) low = 0; else high = nrow;
2357       lastcol = col;
2358       while (high-low > 7) {
2359         t = (low+high)/2;
2360         if (rp[t] > bcol) high = t;
2361         else              low  = t;
2362       }
2363       for (i=low; i<high; i++) {
2364         if (rp[i] > bcol) break;
2365         if (rp[i] == bcol) {
2366           bap  = ap +  bs2*i + bs*cidx + ridx;
2367           if (is == ADD_VALUES) *bap += value;
2368           else                  *bap  = value;
2369           goto noinsert1;
2370         }
2371       }
2372       if (nonew == 1) goto noinsert1;
2373       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2374       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2375       N = nrow++ - 1; high++;
2376       /* shift up all the later entries in this row */
2377       for (ii=N; ii>=i; ii--) {
2378         rp[ii+1] = rp[ii];
2379         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2380       }
2381       if (N>=i) {
2382         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2383       }
2384       rp[i]                      = bcol;
2385       ap[bs2*i + bs*cidx + ridx] = value;
2386       a->nz++;
2387       noinsert1:;
2388       low = i;
2389     }
2390     ailen[brow] = nrow;
2391   }
2392   A->same_nonzero = PETSC_FALSE;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2398 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2399 {
2400   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2401   Mat            outA;
2402   PetscErrorCode ierr;
2403   PetscBool      row_identity,col_identity;
2404 
2405   PetscFunctionBegin;
2406   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2407   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2408   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2409   if (!row_identity || !col_identity) {
2410     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2411   }
2412 
2413   outA            = inA;
2414   inA->factortype = MAT_FACTOR_LU;
2415 
2416   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2417 
2418   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2419   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2420   a->row = row;
2421   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2422   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2423   a->col = col;
2424 
2425   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2426   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2427    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2428   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2429 
2430   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2431   if (!a->solve_work) {
2432     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2433     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2434   }
2435   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2436 
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 EXTERN_C_BEGIN
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2443 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2444 {
2445   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2446   PetscInt    i,nz,mbs;
2447 
2448   PetscFunctionBegin;
2449   nz  = baij->maxnz;
2450   mbs = baij->mbs;
2451   for (i=0; i<nz; i++) {
2452     baij->j[i] = indices[i];
2453   }
2454   baij->nz = nz;
2455   for (i=0; i<mbs; i++) {
2456     baij->ilen[i] = baij->imax[i];
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 EXTERN_C_END
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2464 /*@
2465     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2466        in the matrix.
2467 
2468   Input Parameters:
2469 +  mat - the SeqBAIJ matrix
2470 -  indices - the column indices
2471 
2472   Level: advanced
2473 
2474   Notes:
2475     This can be called if you have precomputed the nonzero structure of the
2476   matrix and want to provide it to the matrix object to improve the performance
2477   of the MatSetValues() operation.
2478 
2479     You MUST have set the correct numbers of nonzeros per row in the call to
2480   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2481 
2482     MUST be called before any calls to MatSetValues();
2483 
2484 @*/
2485 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2486 {
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2491   PetscValidPointer(indices,2);
2492   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2498 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2499 {
2500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2501   PetscErrorCode ierr;
2502   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2503   PetscReal      atmp;
2504   PetscScalar    *x,zero = 0.0;
2505   MatScalar      *aa;
2506   PetscInt       ncols,brow,krow,kcol;
2507 
2508   PetscFunctionBegin;
2509   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2510   bs   = A->rmap->bs;
2511   aa   = a->a;
2512   ai   = a->i;
2513   aj   = a->j;
2514   mbs  = a->mbs;
2515 
2516   ierr = VecSet(v,zero);CHKERRQ(ierr);
2517   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2518   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2519   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2520   for (i=0; i<mbs; i++) {
2521     ncols = ai[1] - ai[0]; ai++;
2522     brow  = bs*i;
2523     for (j=0; j<ncols; j++){
2524       for (kcol=0; kcol<bs; kcol++){
2525         for (krow=0; krow<bs; krow++){
2526           atmp = PetscAbsScalar(*aa);aa++;
2527           row = brow + krow;    /* row index */
2528           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2529           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2530         }
2531       }
2532       aj++;
2533     }
2534   }
2535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatCopy_SeqBAIJ"
2541 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2542 {
2543   PetscErrorCode ierr;
2544 
2545   PetscFunctionBegin;
2546   /* If the two matrices have the same copy implementation, use fast copy. */
2547   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2548     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2549     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2550     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2551 
2552     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2553     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2554     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2555   } else {
2556     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2563 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2564 {
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2574 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2575 {
2576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2577   PetscFunctionBegin;
2578   *array = a->a;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2584 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2585 {
2586   PetscFunctionBegin;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2592 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2593 {
2594   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2595   PetscErrorCode ierr;
2596   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2597   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2598 
2599   PetscFunctionBegin;
2600   if (str == SAME_NONZERO_PATTERN) {
2601     PetscScalar alpha = a;
2602     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2603   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2604     if (y->xtoy && y->XtoY != X) {
2605       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2606       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2607     }
2608     if (!y->xtoy) { /* get xtoy */
2609       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2610       y->XtoY = X;
2611       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2612     }
2613     bs2 = bs*bs;
2614     for (i=0; i<x->nz; i++) {
2615       j = 0;
2616       while (j < bs2){
2617         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2618         j++;
2619       }
2620     }
2621     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2622   } else {
2623     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2624   }
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2630 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2631 {
2632   PetscInt rbs,cbs;
2633   PetscErrorCode ierr;
2634 
2635   PetscFunctionBegin;
2636   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2637   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2638   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2639   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2645 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2646 {
2647   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2648   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2649   MatScalar      *aa = a->a;
2650 
2651   PetscFunctionBegin;
2652   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2658 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2659 {
2660   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2661   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2662   MatScalar      *aa = a->a;
2663 
2664   PetscFunctionBegin;
2665   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2673 /*
2674     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2675 */
2676 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2677 {
2678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2679   PetscErrorCode ierr;
2680   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2681   PetscInt       nz = a->i[m],row,*jj,mr,col;
2682 
2683   PetscFunctionBegin;
2684   *nn = n;
2685   if (!ia) PetscFunctionReturn(0);
2686   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2687   else {
2688     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2689     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2690     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2691     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2692     jj = a->j;
2693     for (i=0; i<nz; i++) {
2694       collengths[jj[i]]++;
2695     }
2696     cia[0] = oshift;
2697     for (i=0; i<n; i++) {
2698       cia[i+1] = cia[i] + collengths[i];
2699     }
2700     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2701     jj   = a->j;
2702     for (row=0; row<m; row++) {
2703       mr = a->i[row+1] - a->i[row];
2704       for (i=0; i<mr; i++) {
2705         col = *jj++;
2706         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2707       }
2708     }
2709     ierr = PetscFree(collengths);CHKERRQ(ierr);
2710     *ia = cia; *ja = cja;
2711   }
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2717 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2718 {
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   if (!ia) PetscFunctionReturn(0);
2723   ierr = PetscFree(*ia);CHKERRQ(ierr);
2724   ierr = PetscFree(*ja);CHKERRQ(ierr);
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2730 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2731 {
2732   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2733   PetscErrorCode ierr;
2734   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2735   PetscScalar    dx,*y,*xx,*w3_array;
2736   PetscScalar    *vscale_array;
2737   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2738   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2739   void           *fctx = coloring->fctx;
2740   PetscBool      flg = PETSC_FALSE;
2741   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2742   Vec            x1_tmp;
2743 
2744   PetscFunctionBegin;
2745   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2746   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2747   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2748   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2749 
2750   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2751   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2752   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2753   if (flg) {
2754     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2755   } else {
2756     PetscBool  assembled;
2757     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2758     if (assembled) {
2759       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2760     }
2761   }
2762 
2763   x1_tmp = x1;
2764   if (!coloring->vscale){
2765     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2766   }
2767 
2768   /*
2769     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2770     coloring->F for the coarser grids from the finest
2771   */
2772   if (coloring->F) {
2773     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2774     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2775     if (m1 != m2) {
2776       coloring->F = 0;
2777       }
2778     }
2779 
2780   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2781     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2782   }
2783   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2784 
2785   /* Set w1 = F(x1) */
2786   if (coloring->F) {
2787     w1          = coloring->F; /* use already computed value of function */
2788     coloring->F = 0;
2789   } else {
2790     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2791     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2792     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2793   }
2794 
2795   if (!coloring->w3) {
2796     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2797     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2798   }
2799   w3 = coloring->w3;
2800 
2801     CHKMEMQ;
2802     /* Compute all the local scale factors, including ghost points */
2803   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2804   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2805   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2806   if (ctype == IS_COLORING_GHOSTED){
2807     col_start = 0; col_end = N;
2808   } else if (ctype == IS_COLORING_GLOBAL){
2809     xx = xx - start;
2810     vscale_array = vscale_array - start;
2811     col_start = start; col_end = N + start;
2812   }    CHKMEMQ;
2813   for (col=col_start; col<col_end; col++){
2814     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2815     if (coloring->htype[0] == 'w') {
2816       dx = 1.0 + unorm;
2817     } else {
2818       dx  = xx[col];
2819     }
2820     if (dx == 0.0) dx = 1.0;
2821 #if !defined(PETSC_USE_COMPLEX)
2822     if (dx < umin && dx >= 0.0)      dx = umin;
2823     else if (dx < 0.0 && dx > -umin) dx = -umin;
2824 #else
2825     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2826     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2827 #endif
2828     dx               *= epsilon;
2829     vscale_array[col] = 1.0/dx;
2830   }     CHKMEMQ;
2831   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2832   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2833   if (ctype == IS_COLORING_GLOBAL){
2834     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2835     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2836   }
2837   CHKMEMQ;
2838   if (coloring->vscaleforrow) {
2839     vscaleforrow = coloring->vscaleforrow;
2840   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2841 
2842   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2843   /*
2844     Loop over each color
2845   */
2846   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2847   for (k=0; k<coloring->ncolors; k++) {
2848     coloring->currentcolor = k;
2849     for (i=0; i<bs; i++) {
2850       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2851       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2852       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2853       /*
2854 	Loop over each column associated with color
2855 	adding the perturbation to the vector w3.
2856       */
2857       for (l=0; l<coloring->ncolumns[k]; l++) {
2858 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2859 	if (coloring->htype[0] == 'w') {
2860 	  dx = 1.0 + unorm;
2861 	} else {
2862 	  dx  = xx[col];
2863 	}
2864 	if (dx == 0.0) dx = 1.0;
2865 #if !defined(PETSC_USE_COMPLEX)
2866 	if (dx < umin && dx >= 0.0)      dx = umin;
2867 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2868 #else
2869 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2870 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2871 #endif
2872 	dx            *= epsilon;
2873 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2874 	w3_array[col] += dx;
2875       }
2876       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2877       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2878 
2879       /*
2880 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2881 	w2 = F(x1 + dx) - F(x1)
2882       */
2883       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2884       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2885       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2886       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2887 
2888       /*
2889 	Loop over rows of vector, putting results into Jacobian matrix
2890       */
2891       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2892       for (l=0; l<coloring->nrows[k]; l++) {
2893 	row    = bs*coloring->rows[k][l];             /* local row index */
2894 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2895         for (j=0; j<bs; j++) {
2896   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2897           srows[j]  = row + start + j;
2898         }
2899 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2900       }
2901       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2902     }
2903   } /* endof for each color */
2904   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2905   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2906   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2907   ierr = PetscFree(srows);CHKERRQ(ierr);
2908 
2909   coloring->currentcolor = -1;
2910   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2911   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2912   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 /* -------------------------------------------------------------------*/
2917 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2918        MatGetRow_SeqBAIJ,
2919        MatRestoreRow_SeqBAIJ,
2920        MatMult_SeqBAIJ_N,
2921 /* 4*/ MatMultAdd_SeqBAIJ_N,
2922        MatMultTranspose_SeqBAIJ,
2923        MatMultTransposeAdd_SeqBAIJ,
2924        0,
2925        0,
2926        0,
2927 /*10*/ 0,
2928        MatLUFactor_SeqBAIJ,
2929        0,
2930        0,
2931        MatTranspose_SeqBAIJ,
2932 /*15*/ MatGetInfo_SeqBAIJ,
2933        MatEqual_SeqBAIJ,
2934        MatGetDiagonal_SeqBAIJ,
2935        MatDiagonalScale_SeqBAIJ,
2936        MatNorm_SeqBAIJ,
2937 /*20*/ 0,
2938        MatAssemblyEnd_SeqBAIJ,
2939        MatSetOption_SeqBAIJ,
2940        MatZeroEntries_SeqBAIJ,
2941 /*24*/ MatZeroRows_SeqBAIJ,
2942        0,
2943        0,
2944        0,
2945        0,
2946 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2947        0,
2948        0,
2949        MatGetArray_SeqBAIJ,
2950        MatRestoreArray_SeqBAIJ,
2951 /*34*/ MatDuplicate_SeqBAIJ,
2952        0,
2953        0,
2954        MatILUFactor_SeqBAIJ,
2955        0,
2956 /*39*/ MatAXPY_SeqBAIJ,
2957        MatGetSubMatrices_SeqBAIJ,
2958        MatIncreaseOverlap_SeqBAIJ,
2959        MatGetValues_SeqBAIJ,
2960        MatCopy_SeqBAIJ,
2961 /*44*/ 0,
2962        MatScale_SeqBAIJ,
2963        0,
2964        0,
2965        MatZeroRowsColumns_SeqBAIJ,
2966 /*49*/ MatSetBlockSize_SeqBAIJ,
2967        MatGetRowIJ_SeqBAIJ,
2968        MatRestoreRowIJ_SeqBAIJ,
2969        MatGetColumnIJ_SeqBAIJ,
2970        MatRestoreColumnIJ_SeqBAIJ,
2971 /*54*/ MatFDColoringCreate_SeqAIJ,
2972        0,
2973        0,
2974        0,
2975        MatSetValuesBlocked_SeqBAIJ,
2976 /*59*/ MatGetSubMatrix_SeqBAIJ,
2977        MatDestroy_SeqBAIJ,
2978        MatView_SeqBAIJ,
2979        0,
2980        0,
2981 /*64*/ 0,
2982        0,
2983        0,
2984        0,
2985        0,
2986 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2987        0,
2988        MatConvert_Basic,
2989        0,
2990        0,
2991 /*74*/ 0,
2992        MatFDColoringApply_BAIJ,
2993        0,
2994        0,
2995        0,
2996 /*79*/ 0,
2997        0,
2998        0,
2999        0,
3000        MatLoad_SeqBAIJ,
3001 /*84*/ 0,
3002        0,
3003        0,
3004        0,
3005        0,
3006 /*89*/ 0,
3007        0,
3008        0,
3009        0,
3010        0,
3011 /*94*/ 0,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*99*/0,
3017        0,
3018        0,
3019        0,
3020        0,
3021 /*104*/0,
3022        MatRealPart_SeqBAIJ,
3023        MatImaginaryPart_SeqBAIJ,
3024        0,
3025        0,
3026 /*109*/0,
3027        0,
3028        0,
3029        0,
3030        MatMissingDiagonal_SeqBAIJ,
3031 /*114*/0,
3032        0,
3033        0,
3034        0,
3035        0,
3036 /*119*/0,
3037        0,
3038        MatMultHermitianTranspose_SeqBAIJ,
3039        MatMultHermitianTransposeAdd_SeqBAIJ,
3040        0,
3041 };
3042 
3043 EXTERN_C_BEGIN
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3046 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3047 {
3048   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3049   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3050   PetscErrorCode ierr;
3051 
3052   PetscFunctionBegin;
3053   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3054 
3055   /* allocate space for values if not already there */
3056   if (!aij->saved_values) {
3057     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3058     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3059   }
3060 
3061   /* copy values over */
3062   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 EXTERN_C_BEGIN
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3070 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3071 {
3072   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3073   PetscErrorCode ierr;
3074   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3075 
3076   PetscFunctionBegin;
3077   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3078   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3079 
3080   /* copy values over */
3081   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3082   PetscFunctionReturn(0);
3083 }
3084 EXTERN_C_END
3085 
3086 EXTERN_C_BEGIN
3087 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3088 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3089 EXTERN_C_END
3090 
3091 EXTERN_C_BEGIN
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3094 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3095 {
3096   Mat_SeqBAIJ    *b;
3097   PetscErrorCode ierr;
3098   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3099   PetscBool      flg,skipallocation = PETSC_FALSE;
3100 
3101   PetscFunctionBegin;
3102 
3103   if (nz == MAT_SKIP_ALLOCATION) {
3104     skipallocation = PETSC_TRUE;
3105     nz             = 0;
3106   }
3107 
3108   if (bs < 0) {
3109     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3110       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3111     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3112     bs   = PetscAbs(bs);
3113   }
3114   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3115   bs   = newbs;
3116 
3117   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3118   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3119   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3120   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3121 
3122   B->preallocated = PETSC_TRUE;
3123 
3124   mbs  = B->rmap->n/bs;
3125   nbs  = B->cmap->n/bs;
3126   bs2  = bs*bs;
3127 
3128   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3129 
3130   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3131   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3132   if (nnz) {
3133     for (i=0; i<mbs; i++) {
3134       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3135       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3136     }
3137   }
3138 
3139   b       = (Mat_SeqBAIJ*)B->data;
3140   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3141     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3142   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3143 
3144   if (!flg) {
3145     switch (bs) {
3146     case 1:
3147       B->ops->mult            = MatMult_SeqBAIJ_1;
3148       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3149       B->ops->sor             = MatSOR_SeqBAIJ_1;
3150       break;
3151     case 2:
3152       B->ops->mult            = MatMult_SeqBAIJ_2;
3153       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3154       B->ops->sor             = MatSOR_SeqBAIJ_2;
3155       break;
3156     case 3:
3157       B->ops->mult            = MatMult_SeqBAIJ_3;
3158       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3159       B->ops->sor             = MatSOR_SeqBAIJ_3;
3160       break;
3161     case 4:
3162       B->ops->mult            = MatMult_SeqBAIJ_4;
3163       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3164       B->ops->sor             = MatSOR_SeqBAIJ_4;
3165       break;
3166     case 5:
3167       B->ops->mult            = MatMult_SeqBAIJ_5;
3168       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3169       B->ops->sor             = MatSOR_SeqBAIJ_5;
3170       break;
3171     case 6:
3172       B->ops->mult            = MatMult_SeqBAIJ_6;
3173       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3174       B->ops->sor             = MatSOR_SeqBAIJ_6;
3175       break;
3176     case 7:
3177       B->ops->mult            = MatMult_SeqBAIJ_7;
3178       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3179       B->ops->sor             = MatSOR_SeqBAIJ_7;
3180       break;
3181     case 15:
3182       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3183       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3184       B->ops->sor             = MatSOR_SeqBAIJ_N;
3185       break;
3186     default:
3187       B->ops->mult            = MatMult_SeqBAIJ_N;
3188       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3189       B->ops->sor             = MatSOR_SeqBAIJ_N;
3190       break;
3191     }
3192   }
3193   B->rmap->bs  = bs;
3194   b->mbs       = mbs;
3195   b->nbs       = nbs;
3196   if (!skipallocation) {
3197     if (!b->imax) {
3198       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3199       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3200       b->free_imax_ilen = PETSC_TRUE;
3201     }
3202     /* b->ilen will count nonzeros in each block row so far. */
3203     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3204     if (!nnz) {
3205       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3206       else if (nz < 0) nz = 1;
3207       for (i=0; i<mbs; i++) b->imax[i] = nz;
3208       nz = nz*mbs;
3209     } else {
3210       nz = 0;
3211       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3212     }
3213 
3214     /* allocate the matrix space */
3215     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3216     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3217     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3218     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3219     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3220     b->singlemalloc = PETSC_TRUE;
3221     b->i[0] = 0;
3222     for (i=1; i<mbs+1; i++) {
3223       b->i[i] = b->i[i-1] + b->imax[i-1];
3224     }
3225     b->free_a     = PETSC_TRUE;
3226     b->free_ij    = PETSC_TRUE;
3227   } else {
3228     b->free_a     = PETSC_FALSE;
3229     b->free_ij    = PETSC_FALSE;
3230   }
3231 
3232   B->rmap->bs          = bs;
3233   b->bs2              = bs2;
3234   b->mbs              = mbs;
3235   b->nz               = 0;
3236   b->maxnz            = nz;
3237   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3238   PetscFunctionReturn(0);
3239 }
3240 EXTERN_C_END
3241 
3242 EXTERN_C_BEGIN
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3245 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3246 {
3247   PetscInt       i,m,nz,nz_max=0,*nnz;
3248   PetscScalar    *values=0;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3253   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3254   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3255   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3256   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3257   m = B->rmap->n/bs;
3258 
3259   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3260   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3261   for(i=0; i<m; i++) {
3262     nz = ii[i+1]- ii[i];
3263     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3264     nz_max = PetscMax(nz_max, nz);
3265     nnz[i] = nz;
3266   }
3267   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3268   ierr = PetscFree(nnz);CHKERRQ(ierr);
3269 
3270   values = (PetscScalar*)V;
3271   if (!values) {
3272     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3273     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3274   }
3275   for (i=0; i<m; i++) {
3276     PetscInt          ncols  = ii[i+1] - ii[i];
3277     const PetscInt    *icols = jj + ii[i];
3278     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3279     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3280   }
3281   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3282   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284 
3285   PetscFunctionReturn(0);
3286 }
3287 EXTERN_C_END
3288 
3289 
3290 EXTERN_C_BEGIN
3291 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3292 #if defined(PETSC_HAVE_MUMPS)
3293 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3294 #endif
3295 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3296 EXTERN_C_END
3297 
3298 /*MC
3299    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3300    block sparse compressed row format.
3301 
3302    Options Database Keys:
3303 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3304 
3305   Level: beginner
3306 
3307 .seealso: MatCreateSeqBAIJ()
3308 M*/
3309 
3310 EXTERN_C_BEGIN
3311 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3312 EXTERN_C_END
3313 
3314 EXTERN_C_BEGIN
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatCreate_SeqBAIJ"
3317 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3318 {
3319   PetscErrorCode ierr;
3320   PetscMPIInt    size;
3321   Mat_SeqBAIJ    *b;
3322 
3323   PetscFunctionBegin;
3324   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3325   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3326 
3327   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3328   B->data = (void*)b;
3329   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3330   b->row                   = 0;
3331   b->col                   = 0;
3332   b->icol                  = 0;
3333   b->reallocs              = 0;
3334   b->saved_values          = 0;
3335 
3336   b->roworiented           = PETSC_TRUE;
3337   b->nonew                 = 0;
3338   b->diag                  = 0;
3339   b->solve_work            = 0;
3340   b->mult_work             = 0;
3341   B->spptr                 = 0;
3342   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3343   b->keepnonzeropattern    = PETSC_FALSE;
3344   b->xtoy                  = 0;
3345   b->XtoY                  = 0;
3346   B->same_nonzero          = PETSC_FALSE;
3347 
3348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3349                                      "MatGetFactorAvailable_seqbaij_petsc",
3350                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3352                                      "MatGetFactor_seqbaij_petsc",
3353                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3354 #if defined(PETSC_HAVE_MUMPS)
3355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3356 #endif
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3358                                      "MatInvertBlockDiagonal_SeqBAIJ",
3359                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3361                                      "MatStoreValues_SeqBAIJ",
3362                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3364                                      "MatRetrieveValues_SeqBAIJ",
3365                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3367                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3368                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3370                                      "MatConvert_SeqBAIJ_SeqAIJ",
3371                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3373                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3374                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3376                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3377                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3379                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3380                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3382                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3383                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3384   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 EXTERN_C_END
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3392 {
3393   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3394   PetscErrorCode ierr;
3395   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3396 
3397   PetscFunctionBegin;
3398   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3399 
3400   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3401     c->imax = a->imax;
3402     c->ilen = a->ilen;
3403     c->free_imax_ilen = PETSC_FALSE;
3404   } else {
3405     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3407     for (i=0; i<mbs; i++) {
3408       c->imax[i] = a->imax[i];
3409       c->ilen[i] = a->ilen[i];
3410     }
3411     c->free_imax_ilen = PETSC_TRUE;
3412   }
3413 
3414   /* allocate the matrix space */
3415   if (mallocmatspace){
3416     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3417       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3418       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3419       c->singlemalloc = PETSC_FALSE;
3420       c->free_ij      = PETSC_FALSE;
3421       c->i            = a->i;
3422       c->j            = a->j;
3423       c->parent       = A;
3424       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3425       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3426       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3427     } else {
3428       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3429       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3430       c->singlemalloc = PETSC_TRUE;
3431       c->free_ij      = PETSC_TRUE;
3432       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3433       if (mbs > 0) {
3434 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3435 	if (cpvalues == MAT_COPY_VALUES) {
3436 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3437 	} else {
3438 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3439 	}
3440       }
3441     }
3442   }
3443 
3444   c->roworiented = a->roworiented;
3445   c->nonew       = a->nonew;
3446   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3447   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3448   c->bs2         = a->bs2;
3449   c->mbs         = a->mbs;
3450   c->nbs         = a->nbs;
3451 
3452   if (a->diag) {
3453     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3454       c->diag      = a->diag;
3455       c->free_diag = PETSC_FALSE;
3456     } else {
3457       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3458       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3459       for (i=0; i<mbs; i++) {
3460         c->diag[i] = a->diag[i];
3461       }
3462       c->free_diag = PETSC_TRUE;
3463     }
3464   } else c->diag        = 0;
3465   c->nz                 = a->nz;
3466   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3467   c->solve_work         = 0;
3468   c->mult_work          = 0;
3469   c->free_a             = PETSC_TRUE;
3470   c->free_ij            = PETSC_TRUE;
3471   C->preallocated       = PETSC_TRUE;
3472   C->assembled          = PETSC_TRUE;
3473 
3474   c->compressedrow.use     = a->compressedrow.use;
3475   c->compressedrow.nrows   = a->compressedrow.nrows;
3476   c->compressedrow.check   = a->compressedrow.check;
3477   if (a->compressedrow.use){
3478     i = a->compressedrow.nrows;
3479     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3480     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3481     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3482     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3483   } else {
3484     c->compressedrow.use    = PETSC_FALSE;
3485     c->compressedrow.i      = PETSC_NULL;
3486     c->compressedrow.rindex = PETSC_NULL;
3487   }
3488   C->same_nonzero = A->same_nonzero;
3489   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3490   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3497 {
3498     PetscErrorCode ierr;
3499 
3500   PetscFunctionBegin;
3501   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3502   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3503   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3504   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatLoad_SeqBAIJ"
3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3511 {
3512   Mat_SeqBAIJ    *a;
3513   PetscErrorCode ierr;
3514   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3515   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3516   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3517   PetscInt       *masked,nmask,tmp,bs2,ishift;
3518   PetscMPIInt    size;
3519   int            fd;
3520   PetscScalar    *aa;
3521   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3522 
3523   PetscFunctionBegin;
3524   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3525   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3526   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3527   bs2  = bs*bs;
3528 
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3531   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3532   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3533   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3534   M = header[1]; N = header[2]; nz = header[3];
3535 
3536   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3537   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3538 
3539   /*
3540      This code adds extra rows to make sure the number of rows is
3541     divisible by the blocksize
3542   */
3543   mbs        = M/bs;
3544   extra_rows = bs - M + bs*(mbs);
3545   if (extra_rows == bs) extra_rows = 0;
3546   else                  mbs++;
3547   if (extra_rows) {
3548     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3549   }
3550 
3551   /* Set global sizes if not already set */
3552   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3553     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3554   } else { /* Check if the matrix global sizes are correct */
3555     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3556     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3557   }
3558 
3559   /* read in row lengths */
3560   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3561   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3562   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3563 
3564   /* read in column indices */
3565   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3566   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3567   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3568 
3569   /* loop over row lengths determining block row lengths */
3570   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3571   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3572   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3573   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3574   rowcount = 0;
3575   nzcount = 0;
3576   for (i=0; i<mbs; i++) {
3577     nmask = 0;
3578     for (j=0; j<bs; j++) {
3579       kmax = rowlengths[rowcount];
3580       for (k=0; k<kmax; k++) {
3581         tmp = jj[nzcount++]/bs;
3582         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3583       }
3584       rowcount++;
3585     }
3586     browlengths[i] += nmask;
3587     /* zero out the mask elements we set */
3588     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3589   }
3590 
3591   /* Do preallocation  */
3592   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3593   a = (Mat_SeqBAIJ*)newmat->data;
3594 
3595   /* set matrix "i" values */
3596   a->i[0] = 0;
3597   for (i=1; i<= mbs; i++) {
3598     a->i[i]      = a->i[i-1] + browlengths[i-1];
3599     a->ilen[i-1] = browlengths[i-1];
3600   }
3601   a->nz         = 0;
3602   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3603 
3604   /* read in nonzero values */
3605   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3606   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3607   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3608 
3609   /* set "a" and "j" values into matrix */
3610   nzcount = 0; jcount = 0;
3611   for (i=0; i<mbs; i++) {
3612     nzcountb = nzcount;
3613     nmask    = 0;
3614     for (j=0; j<bs; j++) {
3615       kmax = rowlengths[i*bs+j];
3616       for (k=0; k<kmax; k++) {
3617         tmp = jj[nzcount++]/bs;
3618 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3619       }
3620     }
3621     /* sort the masked values */
3622     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3623 
3624     /* set "j" values into matrix */
3625     maskcount = 1;
3626     for (j=0; j<nmask; j++) {
3627       a->j[jcount++]  = masked[j];
3628       mask[masked[j]] = maskcount++;
3629     }
3630     /* set "a" values into matrix */
3631     ishift = bs2*a->i[i];
3632     for (j=0; j<bs; j++) {
3633       kmax = rowlengths[i*bs+j];
3634       for (k=0; k<kmax; k++) {
3635         tmp       = jj[nzcountb]/bs ;
3636         block     = mask[tmp] - 1;
3637         point     = jj[nzcountb] - bs*tmp;
3638         idx       = ishift + bs2*block + j + bs*point;
3639         a->a[idx] = (MatScalar)aa[nzcountb++];
3640       }
3641     }
3642     /* zero out the mask elements we set */
3643     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3644   }
3645   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3646 
3647   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3648   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3649   ierr = PetscFree(aa);CHKERRQ(ierr);
3650   ierr = PetscFree(jj);CHKERRQ(ierr);
3651   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3652 
3653   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatCreateSeqBAIJ"
3661 /*@C
3662    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3663    compressed row) format.  For good matrix assembly performance the
3664    user should preallocate the matrix storage by setting the parameter nz
3665    (or the array nnz).  By setting these parameters accurately, performance
3666    during matrix assembly can be increased by more than a factor of 50.
3667 
3668    Collective on MPI_Comm
3669 
3670    Input Parameters:
3671 +  comm - MPI communicator, set to PETSC_COMM_SELF
3672 .  bs - size of block
3673 .  m - number of rows
3674 .  n - number of columns
3675 .  nz - number of nonzero blocks  per block row (same for all rows)
3676 -  nnz - array containing the number of nonzero blocks in the various block rows
3677          (possibly different for each block row) or PETSC_NULL
3678 
3679    Output Parameter:
3680 .  A - the matrix
3681 
3682    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3683    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3684    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3685 
3686    Options Database Keys:
3687 .   -mat_no_unroll - uses code that does not unroll the loops in the
3688                      block calculations (much slower)
3689 .    -mat_block_size - size of the blocks to use
3690 
3691    Level: intermediate
3692 
3693    Notes:
3694    The number of rows and columns must be divisible by blocksize.
3695 
3696    If the nnz parameter is given then the nz parameter is ignored
3697 
3698    A nonzero block is any block that as 1 or more nonzeros in it
3699 
3700    The block AIJ format is fully compatible with standard Fortran 77
3701    storage.  That is, the stored row and column indices can begin at
3702    either one (as in Fortran) or zero.  See the users' manual for details.
3703 
3704    Specify the preallocated storage with either nz or nnz (not both).
3705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3706    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3707    matrices.
3708 
3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3710 @*/
3711 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3712 {
3713   PetscErrorCode ierr;
3714 
3715   PetscFunctionBegin;
3716   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3717   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3718   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3719   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 #undef __FUNCT__
3724 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3725 /*@C
3726    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3727    per row in the matrix. For good matrix assembly performance the
3728    user should preallocate the matrix storage by setting the parameter nz
3729    (or the array nnz).  By setting these parameters accurately, performance
3730    during matrix assembly can be increased by more than a factor of 50.
3731 
3732    Collective on MPI_Comm
3733 
3734    Input Parameters:
3735 +  A - the matrix
3736 .  bs - size of block
3737 .  nz - number of block nonzeros per block row (same for all rows)
3738 -  nnz - array containing the number of block nonzeros in the various block rows
3739          (possibly different for each block row) or PETSC_NULL
3740 
3741    Options Database Keys:
3742 .   -mat_no_unroll - uses code that does not unroll the loops in the
3743                      block calculations (much slower)
3744 .    -mat_block_size - size of the blocks to use
3745 
3746    Level: intermediate
3747 
3748    Notes:
3749    If the nnz parameter is given then the nz parameter is ignored
3750 
3751    You can call MatGetInfo() to get information on how effective the preallocation was;
3752    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3753    You can also run with the option -info and look for messages with the string
3754    malloc in them to see if additional memory allocation was needed.
3755 
3756    The block AIJ format is fully compatible with standard Fortran 77
3757    storage.  That is, the stored row and column indices can begin at
3758    either one (as in Fortran) or zero.  See the users' manual for details.
3759 
3760    Specify the preallocated storage with either nz or nnz (not both).
3761    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3762    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3763 
3764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3765 @*/
3766 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3767 {
3768   PetscErrorCode ierr;
3769 
3770   PetscFunctionBegin;
3771   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3777 /*@C
3778    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3779    (the default sequential PETSc format).
3780 
3781    Collective on MPI_Comm
3782 
3783    Input Parameters:
3784 +  A - the matrix
3785 .  i - the indices into j for the start of each local row (starts with zero)
3786 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3787 -  v - optional values in the matrix
3788 
3789    Level: developer
3790 
3791 .keywords: matrix, aij, compressed row, sparse
3792 
3793 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3794 @*/
3795 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3796 {
3797   PetscErrorCode ierr;
3798 
3799   PetscFunctionBegin;
3800   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3801   PetscFunctionReturn(0);
3802 }
3803 
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3807 /*@
3808      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3809 
3810      Collective on MPI_Comm
3811 
3812    Input Parameters:
3813 +  comm - must be an MPI communicator of size 1
3814 .  bs - size of block
3815 .  m - number of rows
3816 .  n - number of columns
3817 .  i - row indices
3818 .  j - column indices
3819 -  a - matrix values
3820 
3821    Output Parameter:
3822 .  mat - the matrix
3823 
3824    Level: advanced
3825 
3826    Notes:
3827        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3828     once the matrix is destroyed
3829 
3830        You cannot set new nonzero locations into this matrix, that will generate an error.
3831 
3832        The i and j indices are 0 based
3833 
3834        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3835 
3836 
3837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3838 
3839 @*/
3840 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3841 {
3842   PetscErrorCode ierr;
3843   PetscInt       ii;
3844   Mat_SeqBAIJ    *baij;
3845 
3846   PetscFunctionBegin;
3847   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3848   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3849 
3850   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3851   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3852   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3853   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3854   baij = (Mat_SeqBAIJ*)(*mat)->data;
3855   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3856   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3857 
3858   baij->i = i;
3859   baij->j = j;
3860   baij->a = a;
3861   baij->singlemalloc = PETSC_FALSE;
3862   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3863   baij->free_a       = PETSC_FALSE;
3864   baij->free_ij       = PETSC_FALSE;
3865 
3866   for (ii=0; ii<m; ii++) {
3867     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3868 #if defined(PETSC_USE_DEBUG)
3869     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3870 #endif
3871   }
3872 #if defined(PETSC_USE_DEBUG)
3873   for (ii=0; ii<baij->i[m]; ii++) {
3874     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3875     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3876   }
3877 #endif
3878 
3879   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883