xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d155d4104c12796cc6e93a7e2b3cd74e97fcda36)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
28   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
37 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
40   PetscErrorCode ierr;
41   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
42   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
43   PetscReal      shift = 0.0;
44 
45   PetscFunctionBegin;
46   if (a->idiagvalid) PetscFunctionReturn(0);
47   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
48   diag_offset = a->diag;
49   if (!a->idiag) {
50     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
51     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
52   }
53   diag  = a->idiag;
54   mdiag = a->idiag+bs2*mbs;
55   /* factor and invert each block */
56   switch (bs){
57     case 1:
58       for (i=0; i<mbs; i++) {
59         odiag = v + 1*diag_offset[i];
60         diag[0]  = odiag[0];
61         mdiag[0] = odiag[0];
62         diag[0]  = 1.0 / (diag[0] + shift);
63         diag    += 1;
64         mdiag   += 1;
65       }
66       break;
67     case 2:
68       for (i=0; i<mbs; i++) {
69         odiag   = v + 4*diag_offset[i];
70         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
71 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
72 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
73 	diag    += 4;
74 	mdiag   += 4;
75       }
76       break;
77     case 3:
78       for (i=0; i<mbs; i++) {
79         odiag    = v + 9*diag_offset[i];
80         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82         diag[8]  = odiag[8];
83         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85         mdiag[8] = odiag[8];
86 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
87 	diag    += 9;
88 	mdiag   += 9;
89       }
90       break;
91     case 4:
92       for (i=0; i<mbs; i++) {
93         odiag  = v + 16*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
97 	diag  += 16;
98 	mdiag += 16;
99       }
100       break;
101     case 5:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 25*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
107 	diag  += 25;
108 	mdiag += 25;
109       }
110       break;
111     case 6:
112       for (i=0; i<mbs; i++) {
113         odiag = v + 36*diag_offset[i];
114         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
117 	diag  += 36;
118 	mdiag += 36;
119       }
120       break;
121     case 7:
122       for (i=0; i<mbs; i++) {
123         odiag = v + 49*diag_offset[i];
124         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
127 	diag  += 49;
128 	mdiag += 49;
129       }
130       break;
131     default:
132       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
133       for (i=0; i<mbs; i++) {
134         odiag = v + bs2*diag_offset[i];
135         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
138 	diag  += bs2;
139 	mdiag += bs2;
140       }
141       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
151 {
152   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
153   PetscScalar        *x,x1,s1;
154   const PetscScalar  *b;
155   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
156   PetscErrorCode     ierr;
157   PetscInt           m = a->mbs,i,i2,nz,j;
158   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   its = its*lits;
163   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
164   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
165   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
166   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
168 
169   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
170 
171   diag  = a->diag;
172   idiag = a->idiag;
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
178       x[0] = b[0]*idiag[0];
179       i2     = 1;
180       idiag += 1;
181       for (i=1; i<m; i++) {
182         v     = aa + ai[i];
183         vi    = aj + ai[i];
184         nz    = diag[i] - ai[i];
185         s1    = b[i2];
186         for (j=0; j<nz; j++) {
187           s1 -= v[j]*x[vi[j]];
188         }
189         x[i2]   = idiag[0]*s1;
190         idiag   += 1;
191         i2      += 1;
192       }
193       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
194       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
195     }
196     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
197         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
198       i2    = 0;
199       mdiag = a->idiag+a->mbs;
200       for (i=0; i<m; i++) {
201         x1      = x[i2];
202         x[i2]   = mdiag[0]*x1;
203         mdiag  += 1;
204         i2     += 1;
205       }
206       ierr = PetscLogFlops(m);CHKERRQ(ierr);
207     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
208       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
209     }
210     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
211       idiag   = a->idiag+a->mbs - 1;
212       i2      = m - 1;
213       x1      = x[i2];
214       x[i2]   = idiag[0]*x1;
215       idiag -= 1;
216       i2    -= 1;
217       for (i=m-2; i>=0; i--) {
218         v     = aa + (diag[i]+1);
219         vi    = aj + diag[i] + 1;
220         nz    = ai[i+1] - diag[i] - 1;
221         s1    = x[i2];
222         for (j=0; j<nz; j++) {
223           s1 -= v[j]*x[vi[j]];
224         }
225         x[i2]   = idiag[0]*s1;
226         idiag   -= 1;
227         i2      -= 1;
228       }
229       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
230     }
231   } else {
232     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
233   }
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   PetscScalar        *x,x1,x2,s1,s2;
245   const PetscScalar  *b;
246   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
247   PetscErrorCode     ierr;
248   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
249   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
250 
251   PetscFunctionBegin;
252   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
253   its = its*lits;
254   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
255   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
256   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
257   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
258   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
259 
260   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
261 
262   diag  = a->diag;
263   idiag = a->idiag;
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
266 
267   if (flag & SOR_ZERO_INITIAL_GUESS) {
268     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
269       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
270       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
271       i2     = 2;
272       idiag += 4;
273       for (i=1; i<m; i++) {
274 	v     = aa + 4*ai[i];
275 	vi    = aj + ai[i];
276 	nz    = diag[i] - ai[i];
277 	s1    = b[i2]; s2 = b[i2+1];
278         for (j=0; j<nz; j++) {
279 	  idx  = 2*vi[j];
280           it   = 4*j;
281 	  x1   = x[idx]; x2 = x[1+idx];
282 	  s1  -= v[it]*x1 + v[it+2]*x2;
283 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
284 	}
285 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
286 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
287         idiag   += 4;
288         i2      += 2;
289       }
290       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
291       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
292     }
293     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
294         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
295       i2    = 0;
296       mdiag = a->idiag+4*a->mbs;
297       for (i=0; i<m; i++) {
298         x1      = x[i2]; x2 = x[i2+1];
299         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
300         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
301         mdiag  += 4;
302         i2     += 2;
303       }
304       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
305     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
306       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
307     }
308     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
309       idiag   = a->idiag+4*a->mbs - 4;
310       i2      = 2*m - 2;
311       x1      = x[i2]; x2 = x[i2+1];
312       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
313       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
314       idiag -= 4;
315       i2    -= 2;
316       for (i=m-2; i>=0; i--) {
317 	v     = aa + 4*(diag[i]+1);
318 	vi    = aj + diag[i] + 1;
319 	nz    = ai[i+1] - diag[i] - 1;
320 	s1    = x[i2]; s2 = x[i2+1];
321         for (j=0; j<nz; j++) {
322  	  idx  = 2*vi[j];
323           it   = 4*j;
324 	  x1   = x[idx]; x2 = x[1+idx];
325 	  s1  -= v[it]*x1 + v[it+2]*x2;
326 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
327 	}
328 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
329 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
330         idiag   -= 4;
331         i2      -= 2;
332       }
333       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
334     }
335   } else {
336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
337   }
338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
348   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
349   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
350   const PetscScalar  *b;
351   PetscErrorCode     ierr;
352   PetscInt           m = a->mbs,i,i2,nz,idx;
353   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
354 
355   PetscFunctionBegin;
356   its = its*lits;
357   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
358   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
359   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
360   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
361   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
362   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
363 
364   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
365 
366   diag  = a->diag;
367   idiag = a->idiag;
368   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
369   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
370 
371   if (flag & SOR_ZERO_INITIAL_GUESS) {
372     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
373       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
374       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
375       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
376       i2     = 3;
377       idiag += 9;
378       for (i=1; i<m; i++) {
379         v     = aa + 9*ai[i];
380         vi    = aj + ai[i];
381         nz    = diag[i] - ai[i];
382         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
383         while (nz--) {
384           idx  = 3*(*vi++);
385           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
386           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
387           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
388           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v   += 9;
390         }
391         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
392         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
393         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
394         idiag   += 9;
395         i2      += 3;
396       }
397       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
398       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
399     }
400     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
401         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
402       i2    = 0;
403       mdiag = a->idiag+9*a->mbs;
404       for (i=0; i<m; i++) {
405         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
406         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
407         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
408         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
409         mdiag  += 9;
410         i2     += 3;
411       }
412       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
413     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
414       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
415     }
416     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
417       idiag   = a->idiag+9*a->mbs - 9;
418       i2      = 3*m - 3;
419       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
420       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
421       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
422       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
423       idiag -= 9;
424       i2    -= 3;
425       for (i=m-2; i>=0; i--) {
426         v     = aa + 9*(diag[i]+1);
427         vi    = aj + diag[i] + 1;
428         nz    = ai[i+1] - diag[i] - 1;
429         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
430         while (nz--) {
431           idx  = 3*(*vi++);
432           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
433           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
434           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
435           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
436           v   += 9;
437         }
438         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
439         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
440         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
441         idiag   -= 9;
442         i2      -= 3;
443       }
444       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
445     }
446   } else {
447     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
448   }
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
457 {
458   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
460   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
461   const PetscScalar  *b;
462   PetscErrorCode     ierr;
463   PetscInt           m = a->mbs,i,i2,nz,idx;
464   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
465 
466   PetscFunctionBegin;
467   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
468   its = its*lits;
469   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
470   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
471   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
472   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
473   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
474 
475   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
476 
477   diag  = a->diag;
478   idiag = a->idiag;
479   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
481 
482   if (flag & SOR_ZERO_INITIAL_GUESS) {
483     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
484       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
485       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
486       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
487       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
488       i2     = 4;
489       idiag += 16;
490       for (i=1; i<m; i++) {
491 	v     = aa + 16*ai[i];
492 	vi    = aj + ai[i];
493 	nz    = diag[i] - ai[i];
494 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
495 	while (nz--) {
496 	  idx  = 4*(*vi++);
497 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
498 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
499 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
500 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
501 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
502 	  v   += 16;
503 	}
504 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
505 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
506 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
507 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
508         idiag   += 16;
509         i2      += 4;
510       }
511       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
512       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
513     }
514     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
515         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
516       i2    = 0;
517       mdiag = a->idiag+16*a->mbs;
518       for (i=0; i<m; i++) {
519         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
521         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
522         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
523         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
524         mdiag  += 16;
525         i2     += 4;
526       }
527       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
528     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
529       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
530     }
531     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
532       idiag   = a->idiag+16*a->mbs - 16;
533       i2      = 4*m - 4;
534       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
535       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
536       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
537       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
538       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
539       idiag -= 16;
540       i2    -= 4;
541       for (i=m-2; i>=0; i--) {
542 	v     = aa + 16*(diag[i]+1);
543 	vi    = aj + diag[i] + 1;
544 	nz    = ai[i+1] - diag[i] - 1;
545 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
546 	while (nz--) {
547 	  idx  = 4*(*vi++);
548 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
549 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
550 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
551 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
552 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
553 	  v   += 16;
554 	}
555 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
556 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
557 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
558 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
559         idiag   -= 16;
560         i2      -= 4;
561       }
562       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
563     }
564   } else {
565     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
575 {
576   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
577   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
578   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
579   const PetscScalar  *b;
580   PetscErrorCode     ierr;
581   PetscInt           m = a->mbs,i,i2,nz,idx;
582   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
583 
584   PetscFunctionBegin;
585   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
586   its = its*lits;
587   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
588   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
589   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
590   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
591   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
592 
593   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
594 
595   diag  = a->diag;
596   idiag = a->idiag;
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
599 
600   if (flag & SOR_ZERO_INITIAL_GUESS) {
601     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
602       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
603       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
604       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
605       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
606       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
607       i2     = 5;
608       idiag += 25;
609       for (i=1; i<m; i++) {
610 	v     = aa + 25*ai[i];
611 	vi    = aj + ai[i];
612 	nz    = diag[i] - ai[i];
613 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
614 	while (nz--) {
615 	  idx  = 5*(*vi++);
616 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
617 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
618 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
619 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
620 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
621 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
622 	  v   += 25;
623 	}
624 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
625 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
626 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
627 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
628 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
629         idiag   += 25;
630         i2      += 5;
631       }
632       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
633       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
634     }
635     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
636         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
637       i2    = 0;
638       mdiag = a->idiag+25*a->mbs;
639       for (i=0; i<m; i++) {
640         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
642         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
643         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
644         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
645         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
646         mdiag  += 25;
647         i2     += 5;
648       }
649       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
650     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
651       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
652     }
653     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
654       idiag   = a->idiag+25*a->mbs - 25;
655       i2      = 5*m - 5;
656       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
657       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
658       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
659       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
660       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
661       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
662       idiag -= 25;
663       i2    -= 5;
664       for (i=m-2; i>=0; i--) {
665 	v     = aa + 25*(diag[i]+1);
666 	vi    = aj + diag[i] + 1;
667 	nz    = ai[i+1] - diag[i] - 1;
668 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
669 	while (nz--) {
670 	  idx  = 5*(*vi++);
671 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
672 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
673 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
674 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
675 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
676 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
677 	  v   += 25;
678 	}
679 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
680 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
681 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
682 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
683 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
684         idiag   -= 25;
685         i2      -= 5;
686       }
687       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
688     }
689   } else {
690     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
700 {
701   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
702   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
703   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
704   const PetscScalar  *b;
705   PetscErrorCode     ierr;
706   PetscInt           m = a->mbs,i,i2,nz,idx;
707   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
708 
709   PetscFunctionBegin;
710   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
711   its = its*lits;
712   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
713   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
714   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
715   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
716   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
717 
718   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
719 
720   diag  = a->diag;
721   idiag = a->idiag;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
724 
725   if (flag & SOR_ZERO_INITIAL_GUESS) {
726     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
727       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
728       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
729       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
730       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
731       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
732       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
733       i2     = 6;
734       idiag += 36;
735       for (i=1; i<m; i++) {
736         v     = aa + 36*ai[i];
737         vi    = aj + ai[i];
738         nz    = diag[i] - ai[i];
739         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
740         while (nz--) {
741           idx  = 6*(*vi++);
742           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
743           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
744           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
745           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
746           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
747           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
748           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
749           v   += 36;
750         }
751         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
752         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
753         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
754         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
755         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
756         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
757         idiag   += 36;
758         i2      += 6;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
762     }
763     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
764         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
765       i2    = 0;
766       mdiag = a->idiag+36*a->mbs;
767       for (i=0; i<m; i++) {
768         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
769         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
770         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
771         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
772         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
773         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
774         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
775         mdiag  += 36;
776         i2     += 6;
777       }
778       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
779     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
780       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
781     }
782     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
783       idiag   = a->idiag+36*a->mbs - 36;
784       i2      = 6*m - 6;
785       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
786       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
787       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
788       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
789       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
790       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
791       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
792       idiag -= 36;
793       i2    -= 6;
794       for (i=m-2; i>=0; i--) {
795         v     = aa + 36*(diag[i]+1);
796         vi    = aj + diag[i] + 1;
797         nz    = ai[i+1] - diag[i] - 1;
798         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
799         while (nz--) {
800           idx  = 6*(*vi++);
801           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
802           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
803           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
804           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
805           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
806           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
807           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
808           v   += 36;
809         }
810         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
811         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
812         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
813         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
814         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
815         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
816         idiag   -= 36;
817         i2      -= 6;
818       }
819       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
820     }
821   } else {
822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
823   }
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
832 {
833   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
834   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
835   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
836   const PetscScalar  *b;
837   PetscErrorCode     ierr;
838   PetscInt           m = a->mbs,i,i2,nz,idx;
839   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
840 
841   PetscFunctionBegin;
842   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
843   its = its*lits;
844   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
845   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
846   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
847   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
848   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
849 
850   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
851 
852   diag  = a->diag;
853   idiag = a->idiag;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
856 
857   if (flag & SOR_ZERO_INITIAL_GUESS) {
858     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
859       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
860       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
861       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
862       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
863       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
864       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
865       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
866       i2     = 7;
867       idiag += 49;
868       for (i=1; i<m; i++) {
869         v     = aa + 49*ai[i];
870         vi    = aj + ai[i];
871         nz    = diag[i] - ai[i];
872         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
873         while (nz--) {
874           idx  = 7*(*vi++);
875           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
876           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
877           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
878           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
879           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
880           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
881           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
882           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
883           v   += 49;
884         }
885         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
886         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
887         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
888         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
889         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
890         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
891         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
892         idiag   += 49;
893         i2      += 7;
894       }
895       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
896       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
897     }
898     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
899         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
900       i2    = 0;
901       mdiag = a->idiag+49*a->mbs;
902       for (i=0; i<m; i++) {
903         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
904         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
905         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
906         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
907         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
908         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
909         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
910         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
911         mdiag  += 49;
912         i2     += 7;
913       }
914       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
915     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
916       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
917     }
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
919       idiag   = a->idiag+49*a->mbs - 49;
920       i2      = 7*m - 7;
921       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
922       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
923       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
924       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
925       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
926       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
927       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
928       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
929       idiag -= 49;
930       i2    -= 7;
931       for (i=m-2; i>=0; i--) {
932         v     = aa + 49*(diag[i]+1);
933         vi    = aj + diag[i] + 1;
934         nz    = ai[i+1] - diag[i] - 1;
935         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
936         while (nz--) {
937           idx  = 7*(*vi++);
938           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
939           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
940           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
941           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
942           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
943           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
944           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
945           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
946           v   += 49;
947         }
948         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
949         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
950         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
951         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
952         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
953         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
954         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
955         idiag   -= 49;
956         i2      -= 7;
957       }
958       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
959     }
960   } else {
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
971 {
972   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar        *x,*work,*w,*workt;
974   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
975   const PetscScalar  *b;
976   PetscErrorCode     ierr;
977   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
978   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
979 
980   PetscFunctionBegin;
981   its = its*lits;
982   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
983   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
984   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
985   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
986   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
987   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
988 
989   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
990 
991   diag  = a->diag;
992   idiag = a->idiag;
993   if (!a->mult_work) {
994     k    = PetscMax(A->rmap->n,A->cmap->n);
995     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
996   }
997   work = a->mult_work;
998   if (!a->sor_work) {
999     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1000   }
1001   w = a->sor_work;
1002 
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1005 
1006   if (flag & SOR_ZERO_INITIAL_GUESS) {
1007     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1008       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1009       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1010       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1011       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1012       i2     = bs;
1013       idiag += bs2;
1014       for (i=1; i<m; i++) {
1015         v     = aa + bs2*ai[i];
1016         vi    = aj + ai[i];
1017         nz    = diag[i] - ai[i];
1018 
1019         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1020 	/* copy all rows of x that are needed into contiguous space */
1021         workt = work;
1022         for (j=0; j<nz; j++) {
1023           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024           workt += bs;
1025         }
1026         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1027        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1028         while (nz--) {
1029           idx  = N*(*vi++);
1030           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1031           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1032           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1033           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1034           v   += N2;
1035 	  } */
1036 
1037         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1038         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1039         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1040         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1041 
1042         idiag   += bs2;
1043         i2      += bs;
1044       }
1045       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1046       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1047     }
1048     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1049         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1050       i2    = 0;
1051       mdiag = a->idiag+bs2*a->mbs;
1052       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053       for (i=0; i<m; i++) {
1054         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1055         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1056         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1057         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1058         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1059 
1060         mdiag  += bs2;
1061         i2     += bs;
1062       }
1063       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1064     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1068       idiag   = a->idiag+bs2*a->mbs - bs2;
1069       i2      = bs*m - bs;
1070       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1071       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1072       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1073       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1074       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1075       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1076       idiag -= bs2;
1077       i2    -= bs;
1078       for (i=m-2; i>=0; i--) {
1079         v     = aa + bs2*(diag[i]+1);
1080         vi    = aj + diag[i] + 1;
1081         nz    = ai[i+1] - diag[i] - 1;
1082 
1083         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1084 	/* copy all rows of x that are needed into contiguous space */
1085         workt = work;
1086         for (j=0; j<nz; j++) {
1087           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1088           workt += bs;
1089         }
1090         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1091         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1092         while (nz--) {
1093           idx  = N*(*vi++);
1094           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1095           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1096           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1097           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1098           v   += N2;
1099 	  } */
1100 
1101         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1102         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1103         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1104         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1105         idiag   -= bs2;
1106         i2      -= bs;
1107       }
1108       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1109     }
1110   } else {
1111     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1112   }
1113   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1120 */
1121 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1124 #define matsetvaluesblocked4_ matsetvaluesblocked4
1125 #endif
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "matsetvaluesblocked4_"
1130 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1131 {
1132   Mat               A = *AA;
1133   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1134   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1135   PetscInt          *ai=a->i,*ailen=a->ilen;
1136   PetscInt          *aj=a->j,stepval,lastcol = -1;
1137   const PetscScalar *value = v;
1138   MatScalar         *ap,*aa = a->a,*bap;
1139 
1140   PetscFunctionBegin;
1141   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1142   stepval = (n-1)*4;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k];
1145     rp   = aj + ai[row];
1146     ap   = aa + 16*ai[row];
1147     nrow = ailen[row];
1148     low  = 0;
1149     high = nrow;
1150     for (l=0; l<n; l++) { /* loop over added columns */
1151       col = in[l];
1152       if (col <= lastcol) low = 0; else high = nrow;
1153       lastcol = col;
1154       value = v + k*(stepval+4 + l)*4;
1155       while (high-low > 7) {
1156         t = (low+high)/2;
1157         if (rp[t] > col) high = t;
1158         else             low  = t;
1159       }
1160       for (i=low; i<high; i++) {
1161         if (rp[i] > col) break;
1162         if (rp[i] == col) {
1163           bap  = ap +  16*i;
1164           for (ii=0; ii<4; ii++,value+=stepval) {
1165             for (jj=ii; jj<16; jj+=4) {
1166               bap[jj] += *value++;
1167             }
1168           }
1169           goto noinsert2;
1170         }
1171       }
1172       N = nrow++ - 1;
1173       high++; /* added new column index thus must search to one higher than before */
1174       /* shift up all the later entries in this row */
1175       for (ii=N; ii>=i; ii--) {
1176         rp[ii+1] = rp[ii];
1177         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1178       }
1179       if (N >= i) {
1180         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1181       }
1182       rp[i] = col;
1183       bap   = ap +  16*i;
1184       for (ii=0; ii<4; ii++,value+=stepval) {
1185         for (jj=ii; jj<16; jj+=4) {
1186           bap[jj] = *value++;
1187         }
1188       }
1189       noinsert2:;
1190       low = i;
1191     }
1192     ailen[row] = nrow;
1193   }
1194   PetscFunctionReturnVoid();
1195 }
1196 EXTERN_C_END
1197 
1198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1199 #define matsetvalues4_ MATSETVALUES4
1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1201 #define matsetvalues4_ matsetvalues4
1202 #endif
1203 
1204 EXTERN_C_BEGIN
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSetValues4_"
1207 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1208 {
1209   Mat         A = *AA;
1210   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1211   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1212   PetscInt    *ai=a->i,*ailen=a->ilen;
1213   PetscInt    *aj=a->j,brow,bcol;
1214   PetscInt    ridx,cidx,lastcol = -1;
1215   MatScalar   *ap,value,*aa=a->a,*bap;
1216 
1217   PetscFunctionBegin;
1218   for (k=0; k<m; k++) { /* loop over added rows */
1219     row  = im[k]; brow = row/4;
1220     rp   = aj + ai[brow];
1221     ap   = aa + 16*ai[brow];
1222     nrow = ailen[brow];
1223     low  = 0;
1224     high = nrow;
1225     for (l=0; l<n; l++) { /* loop over added columns */
1226       col = in[l]; bcol = col/4;
1227       ridx = row % 4; cidx = col % 4;
1228       value = v[l + k*n];
1229       if (col <= lastcol) low = 0; else high = nrow;
1230       lastcol = col;
1231       while (high-low > 7) {
1232         t = (low+high)/2;
1233         if (rp[t] > bcol) high = t;
1234         else              low  = t;
1235       }
1236       for (i=low; i<high; i++) {
1237         if (rp[i] > bcol) break;
1238         if (rp[i] == bcol) {
1239           bap  = ap +  16*i + 4*cidx + ridx;
1240           *bap += value;
1241           goto noinsert1;
1242         }
1243       }
1244       N = nrow++ - 1;
1245       high++; /* added new column thus must search to one higher than before */
1246       /* shift up all the later entries in this row */
1247       for (ii=N; ii>=i; ii--) {
1248         rp[ii+1] = rp[ii];
1249         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1250       }
1251       if (N>=i) {
1252         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1253       }
1254       rp[i]                    = bcol;
1255       ap[16*i + 4*cidx + ridx] = value;
1256       noinsert1:;
1257       low = i;
1258     }
1259     ailen[brow] = nrow;
1260   }
1261   PetscFunctionReturnVoid();
1262 }
1263 EXTERN_C_END
1264 
1265 /*
1266      Checks for missing diagonals
1267 */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274   PetscInt       *diag,*jj = a->j,i;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1278   *missing = PETSC_FALSE;
1279   if (A->rmap->n > 0 && !jj) {
1280     *missing  = PETSC_TRUE;
1281     if (d) *d = 0;
1282     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1283   } else {
1284     diag     = a->diag;
1285     for (i=0; i<a->mbs; i++) {
1286       if (jj[diag[i]] != i) {
1287         *missing  = PETSC_TRUE;
1288         if (d) *d = i;
1289         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1290       }
1291     }
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1299 {
1300   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302   PetscInt       i,j,m = a->mbs;
1303 
1304   PetscFunctionBegin;
1305   if (!a->diag) {
1306     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1307     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1308     a->free_diag = PETSC_TRUE;
1309   }
1310   for (i=0; i<m; i++) {
1311     a->diag[i] = a->i[i+1];
1312     for (j=a->i[i]; j<a->i[i+1]; j++) {
1313       if (a->j[j] == i) {
1314         a->diag[i] = j;
1315         break;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1332   PetscInt       *tia, *tja;
1333 
1334   PetscFunctionBegin;
1335   *nn = n;
1336   if (!ia) PetscFunctionReturn(0);
1337   if (symmetric) {
1338     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1339     nz   = tia[n];
1340   } else {
1341     tia = a->i; tja = a->j;
1342   }
1343 
1344   if (!blockcompressed && bs > 1) {
1345     (*nn) *= bs;
1346     /* malloc & create the natural set of indices */
1347     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1348     if (n) {
1349       (*ia)[0] = 0;
1350       for (j=1; j<bs; j++) {
1351         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1352       }
1353     }
1354 
1355     for (i=1; i<n; i++) {
1356       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1357       for (j=1; j<bs; j++) {
1358         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1359       }
1360     }
1361     if (n) {
1362       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1363     }
1364 
1365     if (ja) {
1366       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1367       cnt = 0;
1368       for (i=0; i<n; i++) {
1369         for (j=0; j<bs; j++) {
1370           for (k=tia[i]; k<tia[i+1]; k++) {
1371             for (l=0; l<bs; l++) {
1372               (*ja)[cnt++] = bs*tja[k] + l;
1373 	    }
1374           }
1375         }
1376       }
1377     }
1378 
1379     n     *= bs;
1380     nz *= bs*bs;
1381     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1382       ierr = PetscFree(tia);CHKERRQ(ierr);
1383       ierr = PetscFree(tja);CHKERRQ(ierr);
1384     }
1385   } else if (oshift == 1) {
1386     if (symmetric) {
1387       PetscInt nz = tia[A->rmap->n/bs];
1388       /*  add 1 to i and j indices */
1389       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1390       *ia = tia;
1391       if (ja) {
1392 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1393         *ja = tja;
1394       }
1395     } else {
1396       PetscInt nz = a->i[A->rmap->n/bs];
1397       /* malloc space and  add 1 to i and j indices */
1398       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1399       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1400       if (ja) {
1401 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1402 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1403       }
1404     }
1405   } else {
1406     *ia = tia;
1407     if (ja) *ja = tja;
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1416 {
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!ia) PetscFunctionReturn(0);
1421   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1422     ierr = PetscFree(*ia);CHKERRQ(ierr);
1423     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436 #if defined(PETSC_USE_LOG)
1437   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1438 #endif
1439   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1441   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1442   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1443   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1444   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1445   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1446   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1447   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1449   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1450   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1451   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1452 
1453   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1454   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1455   ierr = PetscFree(a);CHKERRQ(ierr);
1456 
1457   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1458   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1459   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1460   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1461   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1471 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1472 {
1473   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1474   PetscErrorCode ierr;
1475 
1476   PetscFunctionBegin;
1477   switch (op) {
1478   case MAT_ROW_ORIENTED:
1479     a->roworiented    = flg;
1480     break;
1481   case MAT_KEEP_NONZERO_PATTERN:
1482     a->keepnonzeropattern = flg;
1483     break;
1484   case MAT_NEW_NONZERO_LOCATIONS:
1485     a->nonew          = (flg ? 0 : 1);
1486     break;
1487   case MAT_NEW_NONZERO_LOCATION_ERR:
1488     a->nonew          = (flg ? -1 : 0);
1489     break;
1490   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1491     a->nonew          = (flg ? -2 : 0);
1492     break;
1493   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1494     a->nounused       = (flg ? -1 : 0);
1495     break;
1496   case MAT_CHECK_COMPRESSED_ROW:
1497     a->compressedrow.check = flg;
1498     break;
1499   case MAT_NEW_DIAGONALS:
1500   case MAT_IGNORE_OFF_PROC_ENTRIES:
1501   case MAT_USE_HASH_TABLE:
1502     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1503     break;
1504   case MAT_SYMMETRIC:
1505   case MAT_STRUCTURALLY_SYMMETRIC:
1506   case MAT_HERMITIAN:
1507   case MAT_SYMMETRY_ETERNAL:
1508     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1509     break;
1510   default:
1511     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1518 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1519 {
1520   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1521   PetscErrorCode ierr;
1522   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1523   MatScalar      *aa,*aa_i;
1524   PetscScalar    *v_i;
1525 
1526   PetscFunctionBegin;
1527   bs  = A->rmap->bs;
1528   ai  = a->i;
1529   aj  = a->j;
1530   aa  = a->a;
1531   bs2 = a->bs2;
1532 
1533   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1534 
1535   bn  = row/bs;   /* Block number */
1536   bp  = row % bs; /* Block Position */
1537   M   = ai[bn+1] - ai[bn];
1538   *nz = bs*M;
1539 
1540   if (v) {
1541     *v = 0;
1542     if (*nz) {
1543       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1544       for (i=0; i<M; i++) { /* for each block in the block row */
1545         v_i  = *v + i*bs;
1546         aa_i = aa + bs2*(ai[bn] + i);
1547         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1548       }
1549     }
1550   }
1551 
1552   if (idx) {
1553     *idx = 0;
1554     if (*nz) {
1555       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1556       for (i=0; i<M; i++) { /* for each block in the block row */
1557         idx_i = *idx + i*bs;
1558         itmp  = bs*aj[ai[bn] + i];
1559         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1560       }
1561     }
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1568 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1569 {
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1574   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1582 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1583 {
1584   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1585   Mat            C;
1586   PetscErrorCode ierr;
1587   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1588   PetscInt       *rows,*cols,bs2=a->bs2;
1589   MatScalar      *array;
1590 
1591   PetscFunctionBegin;
1592   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1593   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1594     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1595     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1596 
1597     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1598     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1599     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1600     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1601     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1602     ierr = PetscFree(col);CHKERRQ(ierr);
1603   } else {
1604     C = *B;
1605   }
1606 
1607   array = a->a;
1608   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1609   for (i=0; i<mbs; i++) {
1610     cols[0] = i*bs;
1611     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1612     len = ai[i+1] - ai[i];
1613     for (j=0; j<len; j++) {
1614       rows[0] = (*aj++)*bs;
1615       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1616       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1617       array += bs2;
1618     }
1619   }
1620   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1621 
1622   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624 
1625   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1626     *B = C;
1627   } else {
1628     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1635 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1636 {
1637   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1638   PetscErrorCode ierr;
1639   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1640   int            fd;
1641   PetscScalar    *aa;
1642   FILE           *file;
1643 
1644   PetscFunctionBegin;
1645   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1646   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1647   col_lens[0] = MAT_FILE_CLASSID;
1648 
1649   col_lens[1] = A->rmap->N;
1650   col_lens[2] = A->cmap->n;
1651   col_lens[3] = a->nz*bs2;
1652 
1653   /* store lengths of each row and write (including header) to file */
1654   count = 0;
1655   for (i=0; i<a->mbs; i++) {
1656     for (j=0; j<bs; j++) {
1657       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1658     }
1659   }
1660   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1661   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1662 
1663   /* store column indices (zero start index) */
1664   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1665   count = 0;
1666   for (i=0; i<a->mbs; i++) {
1667     for (j=0; j<bs; j++) {
1668       for (k=a->i[i]; k<a->i[i+1]; k++) {
1669         for (l=0; l<bs; l++) {
1670           jj[count++] = bs*a->j[k] + l;
1671         }
1672       }
1673     }
1674   }
1675   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1676   ierr = PetscFree(jj);CHKERRQ(ierr);
1677 
1678   /* store nonzero values */
1679   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1680   count = 0;
1681   for (i=0; i<a->mbs; i++) {
1682     for (j=0; j<bs; j++) {
1683       for (k=a->i[i]; k<a->i[i+1]; k++) {
1684         for (l=0; l<bs; l++) {
1685           aa[count++] = a->a[bs2*k + l*bs + j];
1686         }
1687       }
1688     }
1689   }
1690   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1691   ierr = PetscFree(aa);CHKERRQ(ierr);
1692 
1693   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1694   if (file) {
1695     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1702 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1703 {
1704   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1705   PetscErrorCode    ierr;
1706   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1707   PetscViewerFormat format;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1711   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1712     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1713   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1714     Mat aij;
1715     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1716     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1717     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1718   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1719      PetscFunctionReturn(0);
1720   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1721     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1722     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1723     for (i=0; i<a->mbs; i++) {
1724       for (j=0; j<bs; j++) {
1725         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1726         for (k=a->i[i]; k<a->i[i+1]; k++) {
1727           for (l=0; l<bs; l++) {
1728 #if defined(PETSC_USE_COMPLEX)
1729             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1734                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1735             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1736               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             }
1738 #else
1739             if (a->a[bs2*k + l*bs + j] != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1741             }
1742 #endif
1743           }
1744         }
1745         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1746       }
1747     }
1748     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1749   } else {
1750     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1751     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1752     for (i=0; i<a->mbs; i++) {
1753       for (j=0; j<bs; j++) {
1754         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1755         for (k=a->i[i]; k<a->i[i+1]; k++) {
1756           for (l=0; l<bs; l++) {
1757 #if defined(PETSC_USE_COMPLEX)
1758             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1763                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1764             } else {
1765               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1766             }
1767 #else
1768             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1769 #endif
1770           }
1771         }
1772         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1773       }
1774     }
1775     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1776   }
1777   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1783 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1784 {
1785   Mat            A = (Mat) Aa;
1786   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1787   PetscErrorCode ierr;
1788   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1789   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1790   MatScalar      *aa;
1791   PetscViewer    viewer;
1792 
1793   PetscFunctionBegin;
1794 
1795   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1796   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1797 
1798   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1799 
1800   /* loop over matrix elements drawing boxes */
1801   color = PETSC_DRAW_BLUE;
1802   for (i=0,row=0; i<mbs; i++,row+=bs) {
1803     for (j=a->i[i]; j<a->i[i+1]; j++) {
1804       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1805       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1806       aa = a->a + j*bs2;
1807       for (k=0; k<bs; k++) {
1808         for (l=0; l<bs; l++) {
1809           if (PetscRealPart(*aa++) >=  0.) continue;
1810           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1811         }
1812       }
1813     }
1814   }
1815   color = PETSC_DRAW_CYAN;
1816   for (i=0,row=0; i<mbs; i++,row+=bs) {
1817     for (j=a->i[i]; j<a->i[i+1]; j++) {
1818       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1819       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1820       aa = a->a + j*bs2;
1821       for (k=0; k<bs; k++) {
1822         for (l=0; l<bs; l++) {
1823           if (PetscRealPart(*aa++) != 0.) continue;
1824           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1825         }
1826       }
1827     }
1828   }
1829 
1830   color = PETSC_DRAW_RED;
1831   for (i=0,row=0; i<mbs; i++,row+=bs) {
1832     for (j=a->i[i]; j<a->i[i+1]; j++) {
1833       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835       aa = a->a + j*bs2;
1836       for (k=0; k<bs; k++) {
1837         for (l=0; l<bs; l++) {
1838           if (PetscRealPart(*aa++) <= 0.) continue;
1839           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840         }
1841       }
1842     }
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1849 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1850 {
1851   PetscErrorCode ierr;
1852   PetscReal      xl,yl,xr,yr,w,h;
1853   PetscDraw      draw;
1854   PetscBool      isnull;
1855 
1856   PetscFunctionBegin;
1857 
1858   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1859   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1860 
1861   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1862   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1863   xr += w;    yr += h;  xl = -w;     yl = -h;
1864   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1865   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1866   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatView_SeqBAIJ"
1872 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1873 {
1874   PetscErrorCode ierr;
1875   PetscBool      iascii,isbinary,isdraw;
1876 
1877   PetscFunctionBegin;
1878   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1879   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1880   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1881   if (iascii){
1882     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1883   } else if (isbinary) {
1884     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1885   } else if (isdraw) {
1886     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1887   } else {
1888     Mat B;
1889     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1890     ierr = MatView(B,viewer);CHKERRQ(ierr);
1891     ierr = MatDestroy(&B);CHKERRQ(ierr);
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1899 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1900 {
1901   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1902   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1903   PetscInt    *ai = a->i,*ailen = a->ilen;
1904   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1905   MatScalar   *ap,*aa = a->a;
1906 
1907   PetscFunctionBegin;
1908   for (k=0; k<m; k++) { /* loop over rows */
1909     row  = im[k]; brow = row/bs;
1910     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1911     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1912     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1913     nrow = ailen[brow];
1914     for (l=0; l<n; l++) { /* loop over columns */
1915       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1916       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1917       col  = in[l] ;
1918       bcol = col/bs;
1919       cidx = col%bs;
1920       ridx = row%bs;
1921       high = nrow;
1922       low  = 0; /* assume unsorted */
1923       while (high-low > 5) {
1924         t = (low+high)/2;
1925         if (rp[t] > bcol) high = t;
1926         else             low  = t;
1927       }
1928       for (i=low; i<high; i++) {
1929         if (rp[i] > bcol) break;
1930         if (rp[i] == bcol) {
1931           *v++ = ap[bs2*i+bs*cidx+ridx];
1932           goto finished;
1933         }
1934       }
1935       *v++ = 0.0;
1936       finished:;
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1944 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1945 {
1946   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1947   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1948   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1949   PetscErrorCode    ierr;
1950   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1951   PetscBool         roworiented=a->roworiented;
1952   const PetscScalar *value = v;
1953   MatScalar         *ap,*aa = a->a,*bap;
1954 
1955   PetscFunctionBegin;
1956   if (roworiented) {
1957     stepval = (n-1)*bs;
1958   } else {
1959     stepval = (m-1)*bs;
1960   }
1961   for (k=0; k<m; k++) { /* loop over added rows */
1962     row  = im[k];
1963     if (row < 0) continue;
1964 #if defined(PETSC_USE_DEBUG)
1965     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1966 #endif
1967     rp   = aj + ai[row];
1968     ap   = aa + bs2*ai[row];
1969     rmax = imax[row];
1970     nrow = ailen[row];
1971     low  = 0;
1972     high = nrow;
1973     for (l=0; l<n; l++) { /* loop over added columns */
1974       if (in[l] < 0) continue;
1975 #if defined(PETSC_USE_DEBUG)
1976       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1977 #endif
1978       col = in[l];
1979       if (roworiented) {
1980         value = v + (k*(stepval+bs) + l)*bs;
1981       } else {
1982         value = v + (l*(stepval+bs) + k)*bs;
1983       }
1984       if (col <= lastcol) low = 0; else high = nrow;
1985       lastcol = col;
1986       while (high-low > 7) {
1987         t = (low+high)/2;
1988         if (rp[t] > col) high = t;
1989         else             low  = t;
1990       }
1991       for (i=low; i<high; i++) {
1992         if (rp[i] > col) break;
1993         if (rp[i] == col) {
1994           bap  = ap +  bs2*i;
1995           if (roworiented) {
1996             if (is == ADD_VALUES) {
1997               for (ii=0; ii<bs; ii++,value+=stepval) {
1998                 for (jj=ii; jj<bs2; jj+=bs) {
1999                   bap[jj] += *value++;
2000                 }
2001               }
2002             } else {
2003               for (ii=0; ii<bs; ii++,value+=stepval) {
2004                 for (jj=ii; jj<bs2; jj+=bs) {
2005                   bap[jj] = *value++;
2006                 }
2007               }
2008             }
2009           } else {
2010             if (is == ADD_VALUES) {
2011               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2012                 for (jj=0; jj<bs; jj++) {
2013                   bap[jj] += value[jj];
2014                 }
2015                 bap += bs;
2016               }
2017             } else {
2018               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2019                 for (jj=0; jj<bs; jj++) {
2020                   bap[jj]  = value[jj];
2021                 }
2022                 bap += bs;
2023               }
2024             }
2025           }
2026           goto noinsert2;
2027         }
2028       }
2029       if (nonew == 1) goto noinsert2;
2030       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2031       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2032       N = nrow++ - 1; high++;
2033       /* shift up all the later entries in this row */
2034       for (ii=N; ii>=i; ii--) {
2035         rp[ii+1] = rp[ii];
2036         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2037       }
2038       if (N >= i) {
2039         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2040       }
2041       rp[i] = col;
2042       bap   = ap +  bs2*i;
2043       if (roworiented) {
2044         for (ii=0; ii<bs; ii++,value+=stepval) {
2045           for (jj=ii; jj<bs2; jj+=bs) {
2046             bap[jj] = *value++;
2047           }
2048         }
2049       } else {
2050         for (ii=0; ii<bs; ii++,value+=stepval) {
2051           for (jj=0; jj<bs; jj++) {
2052             *bap++  = *value++;
2053           }
2054         }
2055       }
2056       noinsert2:;
2057       low = i;
2058     }
2059     ailen[row] = nrow;
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2066 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2067 {
2068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2069   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2070   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2071   PetscErrorCode ierr;
2072   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2073   MatScalar      *aa = a->a,*ap;
2074   PetscReal      ratio=0.6;
2075 
2076   PetscFunctionBegin;
2077   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2078 
2079   if (m) rmax = ailen[0];
2080   for (i=1; i<mbs; i++) {
2081     /* move each row back by the amount of empty slots (fshift) before it*/
2082     fshift += imax[i-1] - ailen[i-1];
2083     rmax   = PetscMax(rmax,ailen[i]);
2084     if (fshift) {
2085       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2086       N = ailen[i];
2087       for (j=0; j<N; j++) {
2088         ip[j-fshift] = ip[j];
2089         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2090       }
2091     }
2092     ai[i] = ai[i-1] + ailen[i-1];
2093   }
2094   if (mbs) {
2095     fshift += imax[mbs-1] - ailen[mbs-1];
2096     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2097   }
2098   /* reset ilen and imax for each row */
2099   for (i=0; i<mbs; i++) {
2100     ailen[i] = imax[i] = ai[i+1] - ai[i];
2101   }
2102   a->nz = ai[mbs];
2103 
2104   /* diagonals may have moved, so kill the diagonal pointers */
2105   a->idiagvalid = PETSC_FALSE;
2106   if (fshift && a->diag) {
2107     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2108     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2109     a->diag = 0;
2110   }
2111   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2112   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2113   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2114   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2115   A->info.mallocs     += a->reallocs;
2116   a->reallocs          = 0;
2117   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2118 
2119   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2120   A->same_nonzero = PETSC_TRUE;
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 /*
2125    This function returns an array of flags which indicate the locations of contiguous
2126    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2127    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2128    Assume: sizes should be long enough to hold all the values.
2129 */
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2132 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2133 {
2134   PetscInt   i,j,k,row;
2135   PetscBool  flg;
2136 
2137   PetscFunctionBegin;
2138   for (i=0,j=0; i<n; j++) {
2139     row = idx[i];
2140     if (row%bs!=0) { /* Not the begining of a block */
2141       sizes[j] = 1;
2142       i++;
2143     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2144       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2145       i++;
2146     } else { /* Begining of the block, so check if the complete block exists */
2147       flg = PETSC_TRUE;
2148       for (k=1; k<bs; k++) {
2149         if (row+k != idx[i+k]) { /* break in the block */
2150           flg = PETSC_FALSE;
2151           break;
2152         }
2153       }
2154       if (flg) { /* No break in the bs */
2155         sizes[j] = bs;
2156         i+= bs;
2157       } else {
2158         sizes[j] = 1;
2159         i++;
2160       }
2161     }
2162   }
2163   *bs_max = j;
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2169 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2170 {
2171   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2172   PetscErrorCode    ierr;
2173   PetscInt          i,j,k,count,*rows;
2174   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2175   PetscScalar       zero = 0.0;
2176   MatScalar         *aa;
2177   const PetscScalar *xx;
2178   PetscScalar       *bb;
2179 
2180   PetscFunctionBegin;
2181   /* fix right hand side if needed */
2182   if (x && b) {
2183     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2184     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2185     for (i=0; i<is_n; i++) {
2186       bb[is_idx[i]] = diag*xx[is_idx[i]];
2187     }
2188     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2189     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2190   }
2191 
2192   /* Make a copy of the IS and  sort it */
2193   /* allocate memory for rows,sizes */
2194   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2195 
2196   /* copy IS values to rows, and sort them */
2197   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2198   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2199 
2200   if (baij->keepnonzeropattern) {
2201     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2202     bs_max = is_n;
2203     A->same_nonzero = PETSC_TRUE;
2204   } else {
2205     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2206     A->same_nonzero = PETSC_FALSE;
2207   }
2208 
2209   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2210     row   = rows[j];
2211     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2212     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2213     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2214     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2215       if (diag != 0.0) {
2216         if (baij->ilen[row/bs] > 0) {
2217           baij->ilen[row/bs]       = 1;
2218           baij->j[baij->i[row/bs]] = row/bs;
2219           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2220         }
2221         /* Now insert all the diagonal values for this bs */
2222         for (k=0; k<bs; k++) {
2223           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2224         }
2225       } else { /* (diag == 0.0) */
2226         baij->ilen[row/bs] = 0;
2227       } /* end (diag == 0.0) */
2228     } else { /* (sizes[i] != bs) */
2229 #if defined (PETSC_USE_DEBUG)
2230       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2231 #endif
2232       for (k=0; k<count; k++) {
2233         aa[0] =  zero;
2234         aa    += bs;
2235       }
2236       if (diag != 0.0) {
2237         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2238       }
2239     }
2240   }
2241 
2242   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2243   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2249 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2250 {
2251   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2252   PetscErrorCode    ierr;
2253   PetscInt          i,j,k,count;
2254   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2255   PetscScalar       zero = 0.0;
2256   MatScalar         *aa;
2257   const PetscScalar *xx;
2258   PetscScalar       *bb;
2259   PetscBool         *zeroed,vecs = PETSC_FALSE;
2260 
2261   PetscFunctionBegin;
2262   /* fix right hand side if needed */
2263   if (x && b) {
2264     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2265     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2266     vecs = PETSC_TRUE;
2267   }
2268   A->same_nonzero = PETSC_TRUE;
2269 
2270   /* zero the columns */
2271   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2272   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2273   for (i=0; i<is_n; i++) {
2274     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2275     zeroed[is_idx[i]] = PETSC_TRUE;
2276   }
2277   for (i=0; i<A->rmap->N; i++) {
2278     if (!zeroed[i]) {
2279       row = i/bs;
2280       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2281         for (k=0; k<bs; k++) {
2282           col = bs*baij->j[j] + k;
2283 	  if (zeroed[col]) {
2284 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2285             if (vecs) bb[i] -= aa[0]*xx[col];
2286             aa[0] = 0.0;
2287           }
2288         }
2289       }
2290     } else if (vecs) bb[i] = diag*xx[i];
2291   }
2292   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2293   if (vecs) {
2294     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2295     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2296   }
2297 
2298   /* zero the rows */
2299   for (i=0; i<is_n; i++) {
2300     row   = is_idx[i];
2301     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2302     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2303     for (k=0; k<count; k++) {
2304       aa[0] =  zero;
2305       aa    += bs;
2306     }
2307     if (diag != 0.0) {
2308       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2309     }
2310   }
2311   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2317 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2318 {
2319   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2320   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2321   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2322   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2323   PetscErrorCode ierr;
2324   PetscInt       ridx,cidx,bs2=a->bs2;
2325   PetscBool      roworiented=a->roworiented;
2326   MatScalar      *ap,value,*aa=a->a,*bap;
2327 
2328   PetscFunctionBegin;
2329   if (v) PetscValidScalarPointer(v,6);
2330   for (k=0; k<m; k++) { /* loop over added rows */
2331     row  = im[k];
2332     brow = row/bs;
2333     if (row < 0) continue;
2334 #if defined(PETSC_USE_DEBUG)
2335     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2336 #endif
2337     rp   = aj + ai[brow];
2338     ap   = aa + bs2*ai[brow];
2339     rmax = imax[brow];
2340     nrow = ailen[brow];
2341     low  = 0;
2342     high = nrow;
2343     for (l=0; l<n; l++) { /* loop over added columns */
2344       if (in[l] < 0) continue;
2345 #if defined(PETSC_USE_DEBUG)
2346       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2347 #endif
2348       col = in[l]; bcol = col/bs;
2349       ridx = row % bs; cidx = col % bs;
2350       if (roworiented) {
2351         value = v[l + k*n];
2352       } else {
2353         value = v[k + l*m];
2354       }
2355       if (col <= lastcol) low = 0; else high = nrow;
2356       lastcol = col;
2357       while (high-low > 7) {
2358         t = (low+high)/2;
2359         if (rp[t] > bcol) high = t;
2360         else              low  = t;
2361       }
2362       for (i=low; i<high; i++) {
2363         if (rp[i] > bcol) break;
2364         if (rp[i] == bcol) {
2365           bap  = ap +  bs2*i + bs*cidx + ridx;
2366           if (is == ADD_VALUES) *bap += value;
2367           else                  *bap  = value;
2368           goto noinsert1;
2369         }
2370       }
2371       if (nonew == 1) goto noinsert1;
2372       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2373       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2374       N = nrow++ - 1; high++;
2375       /* shift up all the later entries in this row */
2376       for (ii=N; ii>=i; ii--) {
2377         rp[ii+1] = rp[ii];
2378         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2379       }
2380       if (N>=i) {
2381         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2382       }
2383       rp[i]                      = bcol;
2384       ap[bs2*i + bs*cidx + ridx] = value;
2385       a->nz++;
2386       noinsert1:;
2387       low = i;
2388     }
2389     ailen[brow] = nrow;
2390   }
2391   A->same_nonzero = PETSC_FALSE;
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2397 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2398 {
2399   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2400   Mat            outA;
2401   PetscErrorCode ierr;
2402   PetscBool      row_identity,col_identity;
2403 
2404   PetscFunctionBegin;
2405   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2406   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2407   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2408   if (!row_identity || !col_identity) {
2409     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2410   }
2411 
2412   outA            = inA;
2413   inA->factortype = MAT_FACTOR_LU;
2414 
2415   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2416 
2417   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2418   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2419   a->row = row;
2420   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2422   a->col = col;
2423 
2424   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2425   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2426    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2427   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2428 
2429   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2430   if (!a->solve_work) {
2431     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2432     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2433   }
2434   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2435 
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 EXTERN_C_BEGIN
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2442 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2443 {
2444   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2445   PetscInt    i,nz,mbs;
2446 
2447   PetscFunctionBegin;
2448   nz  = baij->maxnz;
2449   mbs = baij->mbs;
2450   for (i=0; i<nz; i++) {
2451     baij->j[i] = indices[i];
2452   }
2453   baij->nz = nz;
2454   for (i=0; i<mbs; i++) {
2455     baij->ilen[i] = baij->imax[i];
2456   }
2457   PetscFunctionReturn(0);
2458 }
2459 EXTERN_C_END
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2463 /*@
2464     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2465        in the matrix.
2466 
2467   Input Parameters:
2468 +  mat - the SeqBAIJ matrix
2469 -  indices - the column indices
2470 
2471   Level: advanced
2472 
2473   Notes:
2474     This can be called if you have precomputed the nonzero structure of the
2475   matrix and want to provide it to the matrix object to improve the performance
2476   of the MatSetValues() operation.
2477 
2478     You MUST have set the correct numbers of nonzeros per row in the call to
2479   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2480 
2481     MUST be called before any calls to MatSetValues();
2482 
2483 @*/
2484 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2485 {
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2490   PetscValidPointer(indices,2);
2491   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2497 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2498 {
2499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2500   PetscErrorCode ierr;
2501   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2502   PetscReal      atmp;
2503   PetscScalar    *x,zero = 0.0;
2504   MatScalar      *aa;
2505   PetscInt       ncols,brow,krow,kcol;
2506 
2507   PetscFunctionBegin;
2508   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2509   bs   = A->rmap->bs;
2510   aa   = a->a;
2511   ai   = a->i;
2512   aj   = a->j;
2513   mbs  = a->mbs;
2514 
2515   ierr = VecSet(v,zero);CHKERRQ(ierr);
2516   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2517   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2518   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2519   for (i=0; i<mbs; i++) {
2520     ncols = ai[1] - ai[0]; ai++;
2521     brow  = bs*i;
2522     for (j=0; j<ncols; j++){
2523       for (kcol=0; kcol<bs; kcol++){
2524         for (krow=0; krow<bs; krow++){
2525           atmp = PetscAbsScalar(*aa);aa++;
2526           row = brow + krow;    /* row index */
2527           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2528           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2529         }
2530       }
2531       aj++;
2532     }
2533   }
2534   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatCopy_SeqBAIJ"
2540 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2541 {
2542   PetscErrorCode ierr;
2543 
2544   PetscFunctionBegin;
2545   /* If the two matrices have the same copy implementation, use fast copy. */
2546   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2547     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2548     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2549     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2550 
2551     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2552     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2553     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2554   } else {
2555     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2562 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2563 {
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2573 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2574 {
2575   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2576   PetscFunctionBegin;
2577   *array = a->a;
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2583 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2584 {
2585   PetscFunctionBegin;
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2591 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2592 {
2593   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2594   PetscErrorCode ierr;
2595   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2596   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2597 
2598   PetscFunctionBegin;
2599   if (str == SAME_NONZERO_PATTERN) {
2600     PetscScalar alpha = a;
2601     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2602   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2603     if (y->xtoy && y->XtoY != X) {
2604       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2605       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2606     }
2607     if (!y->xtoy) { /* get xtoy */
2608       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2609       y->XtoY = X;
2610       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2611     }
2612     bs2 = bs*bs;
2613     for (i=0; i<x->nz; i++) {
2614       j = 0;
2615       while (j < bs2){
2616         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2617         j++;
2618       }
2619     }
2620     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2621   } else {
2622     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2623   }
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 #undef __FUNCT__
2628 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2629 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2630 {
2631   PetscInt rbs,cbs;
2632   PetscErrorCode ierr;
2633 
2634   PetscFunctionBegin;
2635   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2636   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2637   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2638   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #undef __FUNCT__
2643 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2644 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2645 {
2646   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2647   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2648   MatScalar      *aa = a->a;
2649 
2650   PetscFunctionBegin;
2651   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2657 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2658 {
2659   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2660   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2661   MatScalar      *aa = a->a;
2662 
2663   PetscFunctionBegin;
2664   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2672 /*
2673     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2674 */
2675 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2676 {
2677   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2678   PetscErrorCode ierr;
2679   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2680   PetscInt       nz = a->i[m],row,*jj,mr,col;
2681 
2682   PetscFunctionBegin;
2683   *nn = n;
2684   if (!ia) PetscFunctionReturn(0);
2685   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2686   else {
2687     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2688     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2689     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2690     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2691     jj = a->j;
2692     for (i=0; i<nz; i++) {
2693       collengths[jj[i]]++;
2694     }
2695     cia[0] = oshift;
2696     for (i=0; i<n; i++) {
2697       cia[i+1] = cia[i] + collengths[i];
2698     }
2699     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2700     jj   = a->j;
2701     for (row=0; row<m; row++) {
2702       mr = a->i[row+1] - a->i[row];
2703       for (i=0; i<mr; i++) {
2704         col = *jj++;
2705         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2706       }
2707     }
2708     ierr = PetscFree(collengths);CHKERRQ(ierr);
2709     *ia = cia; *ja = cja;
2710   }
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2716 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2717 {
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   if (!ia) PetscFunctionReturn(0);
2722   ierr = PetscFree(*ia);CHKERRQ(ierr);
2723   ierr = PetscFree(*ja);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2729 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2730 {
2731   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2732   PetscErrorCode ierr;
2733   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2734   PetscScalar    dx,*y,*xx,*w3_array;
2735   PetscScalar    *vscale_array;
2736   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2737   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2738   void           *fctx = coloring->fctx;
2739   PetscBool      flg = PETSC_FALSE;
2740   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2741   Vec            x1_tmp;
2742 
2743   PetscFunctionBegin;
2744   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2745   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2746   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2747   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2748 
2749   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2750   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2751   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2752   if (flg) {
2753     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2754   } else {
2755     PetscBool  assembled;
2756     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2757     if (assembled) {
2758       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2759     }
2760   }
2761 
2762   x1_tmp = x1;
2763   if (!coloring->vscale){
2764     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2765   }
2766 
2767   /*
2768     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2769     coloring->F for the coarser grids from the finest
2770   */
2771   if (coloring->F) {
2772     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2773     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2774     if (m1 != m2) {
2775       coloring->F = 0;
2776       }
2777     }
2778 
2779   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2780     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2781   }
2782   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2783 
2784   /* Set w1 = F(x1) */
2785   if (coloring->F) {
2786     w1          = coloring->F; /* use already computed value of function */
2787     coloring->F = 0;
2788   } else {
2789     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2790     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2791     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2792   }
2793 
2794   if (!coloring->w3) {
2795     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2797   }
2798   w3 = coloring->w3;
2799 
2800     CHKMEMQ;
2801     /* Compute all the local scale factors, including ghost points */
2802   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2803   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2804   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2805   if (ctype == IS_COLORING_GHOSTED){
2806     col_start = 0; col_end = N;
2807   } else if (ctype == IS_COLORING_GLOBAL){
2808     xx = xx - start;
2809     vscale_array = vscale_array - start;
2810     col_start = start; col_end = N + start;
2811   }    CHKMEMQ;
2812   for (col=col_start; col<col_end; col++){
2813     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2814     if (coloring->htype[0] == 'w') {
2815       dx = 1.0 + unorm;
2816     } else {
2817       dx  = xx[col];
2818     }
2819     if (dx == 0.0) dx = 1.0;
2820 #if !defined(PETSC_USE_COMPLEX)
2821     if (dx < umin && dx >= 0.0)      dx = umin;
2822     else if (dx < 0.0 && dx > -umin) dx = -umin;
2823 #else
2824     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2825     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2826 #endif
2827     dx               *= epsilon;
2828     vscale_array[col] = 1.0/dx;
2829   }     CHKMEMQ;
2830   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2831   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2832   if (ctype == IS_COLORING_GLOBAL){
2833     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2834     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2835   }
2836   CHKMEMQ;
2837   if (coloring->vscaleforrow) {
2838     vscaleforrow = coloring->vscaleforrow;
2839   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2840 
2841   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2842   /*
2843     Loop over each color
2844   */
2845   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2846   for (k=0; k<coloring->ncolors; k++) {
2847     coloring->currentcolor = k;
2848     for (i=0; i<bs; i++) {
2849       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2850       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2851       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2852       /*
2853 	Loop over each column associated with color
2854 	adding the perturbation to the vector w3.
2855       */
2856       for (l=0; l<coloring->ncolumns[k]; l++) {
2857 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2858 	if (coloring->htype[0] == 'w') {
2859 	  dx = 1.0 + unorm;
2860 	} else {
2861 	  dx  = xx[col];
2862 	}
2863 	if (dx == 0.0) dx = 1.0;
2864 #if !defined(PETSC_USE_COMPLEX)
2865 	if (dx < umin && dx >= 0.0)      dx = umin;
2866 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2867 #else
2868 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2869 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2870 #endif
2871 	dx            *= epsilon;
2872 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2873 	w3_array[col] += dx;
2874       }
2875       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2876       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2877 
2878       /*
2879 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2880 	w2 = F(x1 + dx) - F(x1)
2881       */
2882       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2883       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2884       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2885       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2886 
2887       /*
2888 	Loop over rows of vector, putting results into Jacobian matrix
2889       */
2890       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2891       for (l=0; l<coloring->nrows[k]; l++) {
2892 	row    = bs*coloring->rows[k][l];             /* local row index */
2893 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2894         for (j=0; j<bs; j++) {
2895   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2896           srows[j]  = row + start + j;
2897         }
2898 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2899       }
2900       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2901     }
2902   } /* endof for each color */
2903   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2904   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2905   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2906   ierr = PetscFree(srows);CHKERRQ(ierr);
2907 
2908   coloring->currentcolor = -1;
2909   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2910   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2911   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 /* -------------------------------------------------------------------*/
2916 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2917        MatGetRow_SeqBAIJ,
2918        MatRestoreRow_SeqBAIJ,
2919        MatMult_SeqBAIJ_N,
2920 /* 4*/ MatMultAdd_SeqBAIJ_N,
2921        MatMultTranspose_SeqBAIJ,
2922        MatMultTransposeAdd_SeqBAIJ,
2923        0,
2924        0,
2925        0,
2926 /*10*/ 0,
2927        MatLUFactor_SeqBAIJ,
2928        0,
2929        0,
2930        MatTranspose_SeqBAIJ,
2931 /*15*/ MatGetInfo_SeqBAIJ,
2932        MatEqual_SeqBAIJ,
2933        MatGetDiagonal_SeqBAIJ,
2934        MatDiagonalScale_SeqBAIJ,
2935        MatNorm_SeqBAIJ,
2936 /*20*/ 0,
2937        MatAssemblyEnd_SeqBAIJ,
2938        MatSetOption_SeqBAIJ,
2939        MatZeroEntries_SeqBAIJ,
2940 /*24*/ MatZeroRows_SeqBAIJ,
2941        0,
2942        0,
2943        0,
2944        0,
2945 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2946        0,
2947        0,
2948        MatGetArray_SeqBAIJ,
2949        MatRestoreArray_SeqBAIJ,
2950 /*34*/ MatDuplicate_SeqBAIJ,
2951        0,
2952        0,
2953        MatILUFactor_SeqBAIJ,
2954        0,
2955 /*39*/ MatAXPY_SeqBAIJ,
2956        MatGetSubMatrices_SeqBAIJ,
2957        MatIncreaseOverlap_SeqBAIJ,
2958        MatGetValues_SeqBAIJ,
2959        MatCopy_SeqBAIJ,
2960 /*44*/ 0,
2961        MatScale_SeqBAIJ,
2962        0,
2963        0,
2964        MatZeroRowsColumns_SeqBAIJ,
2965 /*49*/ MatSetBlockSize_SeqBAIJ,
2966        MatGetRowIJ_SeqBAIJ,
2967        MatRestoreRowIJ_SeqBAIJ,
2968        MatGetColumnIJ_SeqBAIJ,
2969        MatRestoreColumnIJ_SeqBAIJ,
2970 /*54*/ MatFDColoringCreate_SeqAIJ,
2971        0,
2972        0,
2973        0,
2974        MatSetValuesBlocked_SeqBAIJ,
2975 /*59*/ MatGetSubMatrix_SeqBAIJ,
2976        MatDestroy_SeqBAIJ,
2977        MatView_SeqBAIJ,
2978        0,
2979        0,
2980 /*64*/ 0,
2981        0,
2982        0,
2983        0,
2984        0,
2985 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2986        0,
2987        MatConvert_Basic,
2988        0,
2989        0,
2990 /*74*/ 0,
2991        MatFDColoringApply_BAIJ,
2992        0,
2993        0,
2994        0,
2995 /*79*/ 0,
2996        0,
2997        0,
2998        0,
2999        MatLoad_SeqBAIJ,
3000 /*84*/ 0,
3001        0,
3002        0,
3003        0,
3004        0,
3005 /*89*/ 0,
3006        0,
3007        0,
3008        0,
3009        0,
3010 /*94*/ 0,
3011        0,
3012        0,
3013        0,
3014        0,
3015 /*99*/0,
3016        0,
3017        0,
3018        0,
3019        0,
3020 /*104*/0,
3021        MatRealPart_SeqBAIJ,
3022        MatImaginaryPart_SeqBAIJ,
3023        0,
3024        0,
3025 /*109*/0,
3026        0,
3027        0,
3028        0,
3029        MatMissingDiagonal_SeqBAIJ,
3030 /*114*/0,
3031        0,
3032        0,
3033        0,
3034        0,
3035 /*119*/0,
3036        0,
3037        MatMultHermitianTranspose_SeqBAIJ,
3038        MatMultHermitianTransposeAdd_SeqBAIJ,
3039        0,
3040 };
3041 
3042 EXTERN_C_BEGIN
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3045 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3046 {
3047   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3048   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3049   PetscErrorCode ierr;
3050 
3051   PetscFunctionBegin;
3052   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3053 
3054   /* allocate space for values if not already there */
3055   if (!aij->saved_values) {
3056     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3057     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3058   }
3059 
3060   /* copy values over */
3061   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3069 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3070 {
3071   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3072   PetscErrorCode ierr;
3073   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3074 
3075   PetscFunctionBegin;
3076   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3077   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3078 
3079   /* copy values over */
3080   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3081   PetscFunctionReturn(0);
3082 }
3083 EXTERN_C_END
3084 
3085 EXTERN_C_BEGIN
3086 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3087 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3088 EXTERN_C_END
3089 
3090 EXTERN_C_BEGIN
3091 #undef __FUNCT__
3092 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3093 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3094 {
3095   Mat_SeqBAIJ    *b;
3096   PetscErrorCode ierr;
3097   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3098   PetscBool      flg,skipallocation = PETSC_FALSE;
3099 
3100   PetscFunctionBegin;
3101 
3102   if (nz == MAT_SKIP_ALLOCATION) {
3103     skipallocation = PETSC_TRUE;
3104     nz             = 0;
3105   }
3106 
3107   if (bs < 0) {
3108     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3109       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3110     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3111     bs   = PetscAbs(bs);
3112   }
3113   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3114   bs   = newbs;
3115 
3116   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3117   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3118   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3119   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3120 
3121   B->preallocated = PETSC_TRUE;
3122 
3123   mbs  = B->rmap->n/bs;
3124   nbs  = B->cmap->n/bs;
3125   bs2  = bs*bs;
3126 
3127   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3128 
3129   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3130   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3131   if (nnz) {
3132     for (i=0; i<mbs; i++) {
3133       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3134       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3135     }
3136   }
3137 
3138   b       = (Mat_SeqBAIJ*)B->data;
3139   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3140     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3141   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3142 
3143   if (!flg) {
3144     switch (bs) {
3145     case 1:
3146       B->ops->mult            = MatMult_SeqBAIJ_1;
3147       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3148       B->ops->sor             = MatSOR_SeqBAIJ_1;
3149       break;
3150     case 2:
3151       B->ops->mult            = MatMult_SeqBAIJ_2;
3152       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3153       B->ops->sor             = MatSOR_SeqBAIJ_2;
3154       break;
3155     case 3:
3156       B->ops->mult            = MatMult_SeqBAIJ_3;
3157       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3158       B->ops->sor             = MatSOR_SeqBAIJ_3;
3159       break;
3160     case 4:
3161       B->ops->mult            = MatMult_SeqBAIJ_4;
3162       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3163       B->ops->sor             = MatSOR_SeqBAIJ_4;
3164       break;
3165     case 5:
3166       B->ops->mult            = MatMult_SeqBAIJ_5;
3167       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3168       B->ops->sor             = MatSOR_SeqBAIJ_5;
3169       break;
3170     case 6:
3171       B->ops->mult            = MatMult_SeqBAIJ_6;
3172       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3173       B->ops->sor             = MatSOR_SeqBAIJ_6;
3174       break;
3175     case 7:
3176       B->ops->mult            = MatMult_SeqBAIJ_7;
3177       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3178       B->ops->sor             = MatSOR_SeqBAIJ_7;
3179       break;
3180     case 15:
3181       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3182       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3183       B->ops->sor             = MatSOR_SeqBAIJ_N;
3184       break;
3185     default:
3186       B->ops->mult            = MatMult_SeqBAIJ_N;
3187       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3188       B->ops->sor             = MatSOR_SeqBAIJ_N;
3189       break;
3190     }
3191   }
3192   B->rmap->bs  = bs;
3193   b->mbs       = mbs;
3194   b->nbs       = nbs;
3195   if (!skipallocation) {
3196     if (!b->imax) {
3197       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3198       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3199       b->free_imax_ilen = PETSC_TRUE;
3200     }
3201     /* b->ilen will count nonzeros in each block row so far. */
3202     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3203     if (!nnz) {
3204       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3205       else if (nz < 0) nz = 1;
3206       for (i=0; i<mbs; i++) b->imax[i] = nz;
3207       nz = nz*mbs;
3208     } else {
3209       nz = 0;
3210       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3211     }
3212 
3213     /* allocate the matrix space */
3214     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3215     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3216     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3217     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3218     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3219     b->singlemalloc = PETSC_TRUE;
3220     b->i[0] = 0;
3221     for (i=1; i<mbs+1; i++) {
3222       b->i[i] = b->i[i-1] + b->imax[i-1];
3223     }
3224     b->free_a     = PETSC_TRUE;
3225     b->free_ij    = PETSC_TRUE;
3226   } else {
3227     b->free_a     = PETSC_FALSE;
3228     b->free_ij    = PETSC_FALSE;
3229   }
3230 
3231   B->rmap->bs          = bs;
3232   b->bs2              = bs2;
3233   b->mbs              = mbs;
3234   b->nz               = 0;
3235   b->maxnz            = nz;
3236   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3237   PetscFunctionReturn(0);
3238 }
3239 EXTERN_C_END
3240 
3241 EXTERN_C_BEGIN
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3244 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3245 {
3246   PetscInt       i,m,nz,nz_max=0,*nnz;
3247   PetscScalar    *values=0;
3248   PetscErrorCode ierr;
3249 
3250   PetscFunctionBegin;
3251   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3252   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3253   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3254   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3255   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3256   m = B->rmap->n/bs;
3257 
3258   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3259   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3260   for(i=0; i<m; i++) {
3261     nz = ii[i+1]- ii[i];
3262     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3263     nz_max = PetscMax(nz_max, nz);
3264     nnz[i] = nz;
3265   }
3266   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3267   ierr = PetscFree(nnz);CHKERRQ(ierr);
3268 
3269   values = (PetscScalar*)V;
3270   if (!values) {
3271     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3272     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3273   }
3274   for (i=0; i<m; i++) {
3275     PetscInt          ncols  = ii[i+1] - ii[i];
3276     const PetscInt    *icols = jj + ii[i];
3277     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3278     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3279   }
3280   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3281   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283 
3284   PetscFunctionReturn(0);
3285 }
3286 EXTERN_C_END
3287 
3288 
3289 EXTERN_C_BEGIN
3290 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3291 #if defined(PETSC_HAVE_MUMPS)
3292 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3293 #endif
3294 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3295 EXTERN_C_END
3296 
3297 /*MC
3298    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3299    block sparse compressed row format.
3300 
3301    Options Database Keys:
3302 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3303 
3304   Level: beginner
3305 
3306 .seealso: MatCreateSeqBAIJ()
3307 M*/
3308 
3309 
3310 EXTERN_C_BEGIN
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatCreate_SeqBAIJ"
3313 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3314 {
3315   PetscErrorCode ierr;
3316   PetscMPIInt    size;
3317   Mat_SeqBAIJ    *b;
3318 
3319   PetscFunctionBegin;
3320   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3321   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3322 
3323   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3324   B->data = (void*)b;
3325   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3326   b->row                   = 0;
3327   b->col                   = 0;
3328   b->icol                  = 0;
3329   b->reallocs              = 0;
3330   b->saved_values          = 0;
3331 
3332   b->roworiented           = PETSC_TRUE;
3333   b->nonew                 = 0;
3334   b->diag                  = 0;
3335   b->solve_work            = 0;
3336   b->mult_work             = 0;
3337   B->spptr                 = 0;
3338   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3339   b->keepnonzeropattern    = PETSC_FALSE;
3340   b->xtoy                  = 0;
3341   b->XtoY                  = 0;
3342   B->same_nonzero          = PETSC_FALSE;
3343 
3344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3345                                      "MatGetFactorAvailable_seqbaij_petsc",
3346                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3348                                      "MatGetFactor_seqbaij_petsc",
3349                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3350 #if defined(PETSC_HAVE_MUMPS)
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3352 #endif
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3354                                      "MatInvertBlockDiagonal_SeqBAIJ",
3355                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3357                                      "MatStoreValues_SeqBAIJ",
3358                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3360                                      "MatRetrieveValues_SeqBAIJ",
3361                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3363                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3364                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3366                                      "MatConvert_SeqBAIJ_SeqAIJ",
3367                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3369                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3370                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3372                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3373                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3375                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3376                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3377   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 EXTERN_C_END
3381 
3382 #undef __FUNCT__
3383 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3384 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3385 {
3386   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3387   PetscErrorCode ierr;
3388   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3389 
3390   PetscFunctionBegin;
3391   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3392 
3393   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3394     c->imax = a->imax;
3395     c->ilen = a->ilen;
3396     c->free_imax_ilen = PETSC_FALSE;
3397   } else {
3398     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3399     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3400     for (i=0; i<mbs; i++) {
3401       c->imax[i] = a->imax[i];
3402       c->ilen[i] = a->ilen[i];
3403     }
3404     c->free_imax_ilen = PETSC_TRUE;
3405   }
3406 
3407   /* allocate the matrix space */
3408   if (mallocmatspace){
3409     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3410       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3411       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3412       c->singlemalloc = PETSC_FALSE;
3413       c->free_ij      = PETSC_FALSE;
3414       c->i            = a->i;
3415       c->j            = a->j;
3416       c->parent       = A;
3417       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3418       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3419       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3420     } else {
3421       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3422       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3423       c->singlemalloc = PETSC_TRUE;
3424       c->free_ij      = PETSC_TRUE;
3425       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3426       if (mbs > 0) {
3427 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3428 	if (cpvalues == MAT_COPY_VALUES) {
3429 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3430 	} else {
3431 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3432 	}
3433       }
3434     }
3435   }
3436 
3437   c->roworiented = a->roworiented;
3438   c->nonew       = a->nonew;
3439   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3440   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3441   c->bs2         = a->bs2;
3442   c->mbs         = a->mbs;
3443   c->nbs         = a->nbs;
3444 
3445   if (a->diag) {
3446     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3447       c->diag      = a->diag;
3448       c->free_diag = PETSC_FALSE;
3449     } else {
3450       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3451       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3452       for (i=0; i<mbs; i++) {
3453         c->diag[i] = a->diag[i];
3454       }
3455       c->free_diag = PETSC_TRUE;
3456     }
3457   } else c->diag        = 0;
3458   c->nz                 = a->nz;
3459   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3460   c->solve_work         = 0;
3461   c->mult_work          = 0;
3462   c->free_a             = PETSC_TRUE;
3463   c->free_ij            = PETSC_TRUE;
3464   C->preallocated       = PETSC_TRUE;
3465   C->assembled          = PETSC_TRUE;
3466 
3467   c->compressedrow.use     = a->compressedrow.use;
3468   c->compressedrow.nrows   = a->compressedrow.nrows;
3469   c->compressedrow.check   = a->compressedrow.check;
3470   if (a->compressedrow.use){
3471     i = a->compressedrow.nrows;
3472     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3473     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3474     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3475     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3476   } else {
3477     c->compressedrow.use    = PETSC_FALSE;
3478     c->compressedrow.i      = PETSC_NULL;
3479     c->compressedrow.rindex = PETSC_NULL;
3480   }
3481   C->same_nonzero = A->same_nonzero;
3482   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3483   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 #undef __FUNCT__
3488 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3489 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3490 {
3491     PetscErrorCode ierr;
3492 
3493   PetscFunctionBegin;
3494   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3495   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3496   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3497   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatLoad_SeqBAIJ"
3503 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3504 {
3505   Mat_SeqBAIJ    *a;
3506   PetscErrorCode ierr;
3507   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3508   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3509   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3510   PetscInt       *masked,nmask,tmp,bs2,ishift;
3511   PetscMPIInt    size;
3512   int            fd;
3513   PetscScalar    *aa;
3514   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3515 
3516   PetscFunctionBegin;
3517   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3518   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3519   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3520   bs2  = bs*bs;
3521 
3522   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3523   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3524   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3525   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3526   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3527   M = header[1]; N = header[2]; nz = header[3];
3528 
3529   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3530   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3531 
3532   /*
3533      This code adds extra rows to make sure the number of rows is
3534     divisible by the blocksize
3535   */
3536   mbs        = M/bs;
3537   extra_rows = bs - M + bs*(mbs);
3538   if (extra_rows == bs) extra_rows = 0;
3539   else                  mbs++;
3540   if (extra_rows) {
3541     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3542   }
3543 
3544   /* Set global sizes if not already set */
3545   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3546     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3547   } else { /* Check if the matrix global sizes are correct */
3548     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3549     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3550   }
3551 
3552   /* read in row lengths */
3553   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3554   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3555   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3556 
3557   /* read in column indices */
3558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3559   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3560   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3561 
3562   /* loop over row lengths determining block row lengths */
3563   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3564   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3565   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3566   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3567   rowcount = 0;
3568   nzcount = 0;
3569   for (i=0; i<mbs; i++) {
3570     nmask = 0;
3571     for (j=0; j<bs; j++) {
3572       kmax = rowlengths[rowcount];
3573       for (k=0; k<kmax; k++) {
3574         tmp = jj[nzcount++]/bs;
3575         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3576       }
3577       rowcount++;
3578     }
3579     browlengths[i] += nmask;
3580     /* zero out the mask elements we set */
3581     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3582   }
3583 
3584   /* Do preallocation  */
3585   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3586   a = (Mat_SeqBAIJ*)newmat->data;
3587 
3588   /* set matrix "i" values */
3589   a->i[0] = 0;
3590   for (i=1; i<= mbs; i++) {
3591     a->i[i]      = a->i[i-1] + browlengths[i-1];
3592     a->ilen[i-1] = browlengths[i-1];
3593   }
3594   a->nz         = 0;
3595   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3596 
3597   /* read in nonzero values */
3598   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3599   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3600   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3601 
3602   /* set "a" and "j" values into matrix */
3603   nzcount = 0; jcount = 0;
3604   for (i=0; i<mbs; i++) {
3605     nzcountb = nzcount;
3606     nmask    = 0;
3607     for (j=0; j<bs; j++) {
3608       kmax = rowlengths[i*bs+j];
3609       for (k=0; k<kmax; k++) {
3610         tmp = jj[nzcount++]/bs;
3611 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3612       }
3613     }
3614     /* sort the masked values */
3615     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3616 
3617     /* set "j" values into matrix */
3618     maskcount = 1;
3619     for (j=0; j<nmask; j++) {
3620       a->j[jcount++]  = masked[j];
3621       mask[masked[j]] = maskcount++;
3622     }
3623     /* set "a" values into matrix */
3624     ishift = bs2*a->i[i];
3625     for (j=0; j<bs; j++) {
3626       kmax = rowlengths[i*bs+j];
3627       for (k=0; k<kmax; k++) {
3628         tmp       = jj[nzcountb]/bs ;
3629         block     = mask[tmp] - 1;
3630         point     = jj[nzcountb] - bs*tmp;
3631         idx       = ishift + bs2*block + j + bs*point;
3632         a->a[idx] = (MatScalar)aa[nzcountb++];
3633       }
3634     }
3635     /* zero out the mask elements we set */
3636     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3637   }
3638   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3639 
3640   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3641   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3642   ierr = PetscFree(aa);CHKERRQ(ierr);
3643   ierr = PetscFree(jj);CHKERRQ(ierr);
3644   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3645 
3646   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 #undef __FUNCT__
3653 #define __FUNCT__ "MatCreateSeqBAIJ"
3654 /*@C
3655    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3656    compressed row) format.  For good matrix assembly performance the
3657    user should preallocate the matrix storage by setting the parameter nz
3658    (or the array nnz).  By setting these parameters accurately, performance
3659    during matrix assembly can be increased by more than a factor of 50.
3660 
3661    Collective on MPI_Comm
3662 
3663    Input Parameters:
3664 +  comm - MPI communicator, set to PETSC_COMM_SELF
3665 .  bs - size of block
3666 .  m - number of rows
3667 .  n - number of columns
3668 .  nz - number of nonzero blocks  per block row (same for all rows)
3669 -  nnz - array containing the number of nonzero blocks in the various block rows
3670          (possibly different for each block row) or PETSC_NULL
3671 
3672    Output Parameter:
3673 .  A - the matrix
3674 
3675    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3676    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3677    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3678 
3679    Options Database Keys:
3680 .   -mat_no_unroll - uses code that does not unroll the loops in the
3681                      block calculations (much slower)
3682 .    -mat_block_size - size of the blocks to use
3683 
3684    Level: intermediate
3685 
3686    Notes:
3687    The number of rows and columns must be divisible by blocksize.
3688 
3689    If the nnz parameter is given then the nz parameter is ignored
3690 
3691    A nonzero block is any block that as 1 or more nonzeros in it
3692 
3693    The block AIJ format is fully compatible with standard Fortran 77
3694    storage.  That is, the stored row and column indices can begin at
3695    either one (as in Fortran) or zero.  See the users' manual for details.
3696 
3697    Specify the preallocated storage with either nz or nnz (not both).
3698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3699    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3700    matrices.
3701 
3702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3703 @*/
3704 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3705 {
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 #undef __FUNCT__
3717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3718 /*@C
3719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3720    per row in the matrix. For good matrix assembly performance the
3721    user should preallocate the matrix storage by setting the parameter nz
3722    (or the array nnz).  By setting these parameters accurately, performance
3723    during matrix assembly can be increased by more than a factor of 50.
3724 
3725    Collective on MPI_Comm
3726 
3727    Input Parameters:
3728 +  A - the matrix
3729 .  bs - size of block
3730 .  nz - number of block nonzeros per block row (same for all rows)
3731 -  nnz - array containing the number of block nonzeros in the various block rows
3732          (possibly different for each block row) or PETSC_NULL
3733 
3734    Options Database Keys:
3735 .   -mat_no_unroll - uses code that does not unroll the loops in the
3736                      block calculations (much slower)
3737 .    -mat_block_size - size of the blocks to use
3738 
3739    Level: intermediate
3740 
3741    Notes:
3742    If the nnz parameter is given then the nz parameter is ignored
3743 
3744    You can call MatGetInfo() to get information on how effective the preallocation was;
3745    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3746    You can also run with the option -info and look for messages with the string
3747    malloc in them to see if additional memory allocation was needed.
3748 
3749    The block AIJ format is fully compatible with standard Fortran 77
3750    storage.  That is, the stored row and column indices can begin at
3751    either one (as in Fortran) or zero.  See the users' manual for details.
3752 
3753    Specify the preallocated storage with either nz or nnz (not both).
3754    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3755    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3756 
3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3758 @*/
3759 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3760 {
3761   PetscErrorCode ierr;
3762 
3763   PetscFunctionBegin;
3764   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3770 /*@C
3771    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3772    (the default sequential PETSc format).
3773 
3774    Collective on MPI_Comm
3775 
3776    Input Parameters:
3777 +  A - the matrix
3778 .  i - the indices into j for the start of each local row (starts with zero)
3779 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3780 -  v - optional values in the matrix
3781 
3782    Level: developer
3783 
3784 .keywords: matrix, aij, compressed row, sparse
3785 
3786 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3787 @*/
3788 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3789 {
3790   PetscErrorCode ierr;
3791 
3792   PetscFunctionBegin;
3793   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 
3798 #undef __FUNCT__
3799 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3800 /*@
3801      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3802 
3803      Collective on MPI_Comm
3804 
3805    Input Parameters:
3806 +  comm - must be an MPI communicator of size 1
3807 .  bs - size of block
3808 .  m - number of rows
3809 .  n - number of columns
3810 .  i - row indices
3811 .  j - column indices
3812 -  a - matrix values
3813 
3814    Output Parameter:
3815 .  mat - the matrix
3816 
3817    Level: advanced
3818 
3819    Notes:
3820        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3821     once the matrix is destroyed
3822 
3823        You cannot set new nonzero locations into this matrix, that will generate an error.
3824 
3825        The i and j indices are 0 based
3826 
3827        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3828 
3829 
3830 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3831 
3832 @*/
3833 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3834 {
3835   PetscErrorCode ierr;
3836   PetscInt       ii;
3837   Mat_SeqBAIJ    *baij;
3838 
3839   PetscFunctionBegin;
3840   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3841   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3842 
3843   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3844   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3845   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3846   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3847   baij = (Mat_SeqBAIJ*)(*mat)->data;
3848   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3849   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3850 
3851   baij->i = i;
3852   baij->j = j;
3853   baij->a = a;
3854   baij->singlemalloc = PETSC_FALSE;
3855   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3856   baij->free_a       = PETSC_FALSE;
3857   baij->free_ij       = PETSC_FALSE;
3858 
3859   for (ii=0; ii<m; ii++) {
3860     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3861 #if defined(PETSC_USE_DEBUG)
3862     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3863 #endif
3864   }
3865 #if defined(PETSC_USE_DEBUG)
3866   for (ii=0; ii<baij->i[m]; ii++) {
3867     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3868     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3869   }
3870 #endif
3871 
3872   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3873   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3874   PetscFunctionReturn(0);
3875 }
3876