xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ab93d7be622546495d84e7c23b06d6fbd0ced02d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   if (!a->singlemalloc) {
831     if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
832     if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
833     if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
834   } else if (a) {
835     ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
836   }
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
844   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
845   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
846   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
847   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
850 #if defined(PETSC_USE_MAT_SINGLE)
851   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
852 #endif
853   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetOption_SeqBAIJ"
870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871 {
872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   switch (op) {
877   case MAT_ROW_ORIENTED:
878     a->roworiented    = PETSC_TRUE;
879     break;
880   case MAT_COLUMN_ORIENTED:
881     a->roworiented    = PETSC_FALSE;
882     break;
883   case MAT_COLUMNS_SORTED:
884     a->sorted         = PETSC_TRUE;
885     break;
886   case MAT_COLUMNS_UNSORTED:
887     a->sorted         = PETSC_FALSE;
888     break;
889   case MAT_KEEP_ZEROED_ROWS:
890     a->keepzeroedrows = PETSC_TRUE;
891     break;
892   case MAT_NO_NEW_NONZERO_LOCATIONS:
893     a->nonew          = 1;
894     break;
895   case MAT_NEW_NONZERO_LOCATION_ERR:
896     a->nonew          = -1;
897     break;
898   case MAT_NEW_NONZERO_ALLOCATION_ERR:
899     a->nonew          = -2;
900     break;
901   case MAT_YES_NEW_NONZERO_LOCATIONS:
902     a->nonew          = 0;
903     break;
904   case MAT_ROWS_SORTED:
905   case MAT_ROWS_UNSORTED:
906   case MAT_YES_NEW_DIAGONALS:
907   case MAT_IGNORE_OFF_PROC_ENTRIES:
908   case MAT_USE_HASH_TABLE:
909     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
910     break;
911   case MAT_NO_NEW_DIAGONALS:
912     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913   case MAT_SYMMETRIC:
914   case MAT_STRUCTURALLY_SYMMETRIC:
915   case MAT_NOT_SYMMETRIC:
916   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917   case MAT_HERMITIAN:
918   case MAT_NOT_HERMITIAN:
919   case MAT_SYMMETRY_ETERNAL:
920   case MAT_NOT_SYMMETRY_ETERNAL:
921     break;
922   default:
923     SETERRQ(PETSC_ERR_SUP,"unknown option");
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetRow_SeqBAIJ"
930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
931 {
932   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
935   MatScalar      *aa,*aa_i;
936   PetscScalar    *v_i;
937 
938   PetscFunctionBegin;
939   bs  = A->bs;
940   ai  = a->i;
941   aj  = a->j;
942   aa  = a->a;
943   bs2 = a->bs2;
944 
945   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
946 
947   bn  = row/bs;   /* Block number */
948   bp  = row % bs; /* Block Position */
949   M   = ai[bn+1] - ai[bn];
950   *nz = bs*M;
951 
952   if (v) {
953     *v = 0;
954     if (*nz) {
955       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
956       for (i=0; i<M; i++) { /* for each block in the block row */
957         v_i  = *v + i*bs;
958         aa_i = aa + bs2*(ai[bn] + i);
959         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
960       }
961     }
962   }
963 
964   if (idx) {
965     *idx = 0;
966     if (*nz) {
967       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
968       for (i=0; i<M; i++) { /* for each block in the block row */
969         idx_i = *idx + i*bs;
970         itmp  = bs*aj[ai[bn] + i];
971         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
972       }
973     }
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
986   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatTranspose_SeqBAIJ"
992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
993 {
994   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
995   Mat            C;
996   PetscErrorCode ierr;
997   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
998   PetscInt       *rows,*cols,bs2=a->bs2;
999   PetscScalar    *array;
1000 
1001   PetscFunctionBegin;
1002   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1003   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1004   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1005 
1006 #if defined(PETSC_USE_MAT_SINGLE)
1007   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1008   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1009 #else
1010   array = a->a;
1011 #endif
1012 
1013   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1014   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1015   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1016   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1017   ierr = PetscFree(col);CHKERRQ(ierr);
1018   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1019   cols = rows + bs;
1020   for (i=0; i<mbs; i++) {
1021     cols[0] = i*bs;
1022     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1023     len = ai[i+1] - ai[i];
1024     for (j=0; j<len; j++) {
1025       rows[0] = (*aj++)*bs;
1026       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1027       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1028       array += bs2;
1029     }
1030   }
1031   ierr = PetscFree(rows);CHKERRQ(ierr);
1032 #if defined(PETSC_USE_MAT_SINGLE)
1033   ierr = PetscFree(array);
1034 #endif
1035 
1036   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038 
1039   if (B) {
1040     *B = C;
1041   } else {
1042     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1049 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1050 {
1051   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1052   PetscErrorCode ierr;
1053   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1054   int            fd;
1055   PetscScalar    *aa;
1056   FILE           *file;
1057 
1058   PetscFunctionBegin;
1059   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1060   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1061   col_lens[0] = MAT_FILE_COOKIE;
1062 
1063   col_lens[1] = A->m;
1064   col_lens[2] = A->n;
1065   col_lens[3] = a->nz*bs2;
1066 
1067   /* store lengths of each row and write (including header) to file */
1068   count = 0;
1069   for (i=0; i<a->mbs; i++) {
1070     for (j=0; j<bs; j++) {
1071       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1072     }
1073   }
1074   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1075   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1076 
1077   /* store column indices (zero start index) */
1078   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1079   count = 0;
1080   for (i=0; i<a->mbs; i++) {
1081     for (j=0; j<bs; j++) {
1082       for (k=a->i[i]; k<a->i[i+1]; k++) {
1083         for (l=0; l<bs; l++) {
1084           jj[count++] = bs*a->j[k] + l;
1085         }
1086       }
1087     }
1088   }
1089   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1090   ierr = PetscFree(jj);CHKERRQ(ierr);
1091 
1092   /* store nonzero values */
1093   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1094   count = 0;
1095   for (i=0; i<a->mbs; i++) {
1096     for (j=0; j<bs; j++) {
1097       for (k=a->i[i]; k<a->i[i+1]; k++) {
1098         for (l=0; l<bs; l++) {
1099           aa[count++] = a->a[bs2*k + l*bs + j];
1100         }
1101       }
1102     }
1103   }
1104   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1105   ierr = PetscFree(aa);CHKERRQ(ierr);
1106 
1107   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1108   if (file) {
1109     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1116 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1117 {
1118   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode    ierr;
1120   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1121   PetscViewerFormat format;
1122 
1123   PetscFunctionBegin;
1124   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1125   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1126     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1128     Mat aij;
1129     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1130     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1131     ierr = MatDestroy(aij);CHKERRQ(ierr);
1132   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1133      PetscFunctionReturn(0);
1134   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1135     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1136     for (i=0; i<a->mbs; i++) {
1137       for (j=0; j<bs; j++) {
1138         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1139         for (k=a->i[i]; k<a->i[i+1]; k++) {
1140           for (l=0; l<bs; l++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1144                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             }
1151 #else
1152             if (a->a[bs2*k + l*bs + j] != 0.0) {
1153               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1154             }
1155 #endif
1156           }
1157         }
1158         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1159       }
1160     }
1161     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1162   } else {
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1164     for (i=0; i<a->mbs; i++) {
1165       for (j=0; j<bs; j++) {
1166         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1167         for (k=a->i[i]; k<a->i[i+1]; k++) {
1168           for (l=0; l<bs; l++) {
1169 #if defined(PETSC_USE_COMPLEX)
1170             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1172                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             }
1179 #else
1180             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1181 #endif
1182           }
1183         }
1184         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1185       }
1186     }
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1188   }
1189   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1195 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1196 {
1197   Mat            A = (Mat) Aa;
1198   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1201   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1202   MatScalar      *aa;
1203   PetscViewer    viewer;
1204 
1205   PetscFunctionBegin;
1206 
1207   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1208   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1209 
1210   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1211 
1212   /* loop over matrix elements drawing boxes */
1213   color = PETSC_DRAW_BLUE;
1214   for (i=0,row=0; i<mbs; i++,row+=bs) {
1215     for (j=a->i[i]; j<a->i[i+1]; j++) {
1216       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1217       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1218       aa = a->a + j*bs2;
1219       for (k=0; k<bs; k++) {
1220         for (l=0; l<bs; l++) {
1221           if (PetscRealPart(*aa++) >=  0.) continue;
1222           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1223         }
1224       }
1225     }
1226   }
1227   color = PETSC_DRAW_CYAN;
1228   for (i=0,row=0; i<mbs; i++,row+=bs) {
1229     for (j=a->i[i]; j<a->i[i+1]; j++) {
1230       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1231       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1232       aa = a->a + j*bs2;
1233       for (k=0; k<bs; k++) {
1234         for (l=0; l<bs; l++) {
1235           if (PetscRealPart(*aa++) != 0.) continue;
1236           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1237         }
1238       }
1239     }
1240   }
1241 
1242   color = PETSC_DRAW_RED;
1243   for (i=0,row=0; i<mbs; i++,row+=bs) {
1244     for (j=a->i[i]; j<a->i[i+1]; j++) {
1245       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1246       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1247       aa = a->a + j*bs2;
1248       for (k=0; k<bs; k++) {
1249         for (l=0; l<bs; l++) {
1250           if (PetscRealPart(*aa++) <= 0.) continue;
1251           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1252         }
1253       }
1254     }
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1261 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscReal      xl,yl,xr,yr,w,h;
1265   PetscDraw      draw;
1266   PetscTruth     isnull;
1267 
1268   PetscFunctionBegin;
1269 
1270   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1271   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1272 
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1274   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1275   xr += w;    yr += h;  xl = -w;     yl = -h;
1276   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1277   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1278   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatView_SeqBAIJ"
1284 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1285 {
1286   PetscErrorCode ierr;
1287   PetscTruth     iascii,isbinary,isdraw;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1293   if (iascii){
1294     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1295   } else if (isbinary) {
1296     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1297   } else if (isdraw) {
1298     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1299   } else {
1300     Mat B;
1301     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1302     ierr = MatView(B,viewer);CHKERRQ(ierr);
1303     ierr = MatDestroy(B);CHKERRQ(ierr);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1311 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1312 {
1313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1314   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1315   PetscInt    *ai = a->i,*ailen = a->ilen;
1316   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1317   MatScalar   *ap,*aa = a->a,zero = 0.0;
1318 
1319   PetscFunctionBegin;
1320   for (k=0; k<m; k++) { /* loop over rows */
1321     row  = im[k]; brow = row/bs;
1322     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1323     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1324     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1325     nrow = ailen[brow];
1326     for (l=0; l<n; l++) { /* loop over columns */
1327       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1328       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1329       col  = in[l] ;
1330       bcol = col/bs;
1331       cidx = col%bs;
1332       ridx = row%bs;
1333       high = nrow;
1334       low  = 0; /* assume unsorted */
1335       while (high-low > 5) {
1336         t = (low+high)/2;
1337         if (rp[t] > bcol) high = t;
1338         else             low  = t;
1339       }
1340       for (i=low; i<high; i++) {
1341         if (rp[i] > bcol) break;
1342         if (rp[i] == bcol) {
1343           *v++ = ap[bs2*i+bs*cidx+ridx];
1344           goto finished;
1345         }
1346       }
1347       *v++ = zero;
1348       finished:;
1349     }
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #if defined(PETSC_USE_MAT_SINGLE)
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1357 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1358 {
1359   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,N = m*n*b->bs2;
1362   MatScalar      *vsingle;
1363 
1364   PetscFunctionBegin;
1365   if (N > b->setvalueslen) {
1366     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1367     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1368     b->setvalueslen  = N;
1369   }
1370   vsingle = b->setvaluescopy;
1371   for (i=0; i<N; i++) {
1372     vsingle[i] = v[i];
1373   }
1374   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 #endif
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1382 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1383 {
1384   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1385   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1386   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1387   PetscErrorCode  ierr;
1388   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1389   PetscTruth      roworiented=a->roworiented;
1390   const MatScalar *value = v;
1391   MatScalar       *ap,*aa = a->a,*bap;
1392 
1393   PetscFunctionBegin;
1394   if (roworiented) {
1395     stepval = (n-1)*bs;
1396   } else {
1397     stepval = (m-1)*bs;
1398   }
1399   for (k=0; k<m; k++) { /* loop over added rows */
1400     row  = im[k];
1401     if (row < 0) continue;
1402 #if defined(PETSC_USE_DEBUG)
1403     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1404 #endif
1405     rp   = aj + ai[row];
1406     ap   = aa + bs2*ai[row];
1407     rmax = imax[row];
1408     nrow = ailen[row];
1409     low  = 0;
1410     high = nrow;
1411     for (l=0; l<n; l++) { /* loop over added columns */
1412       if (in[l] < 0) continue;
1413 #if defined(PETSC_USE_DEBUG)
1414       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1415 #endif
1416       col = in[l];
1417       if (roworiented) {
1418         value = v + k*(stepval+bs)*bs + l*bs;
1419       } else {
1420         value = v + l*(stepval+bs)*bs + k*bs;
1421       }
1422       if (col < lastcol) low = 0; else high = nrow;
1423       lastcol = col;
1424       while (high-low > 7) {
1425         t = (low+high)/2;
1426         if (rp[t] > col) high = t;
1427         else             low  = t;
1428       }
1429       for (i=low; i<high; i++) {
1430         if (rp[i] > col) break;
1431         if (rp[i] == col) {
1432           bap  = ap +  bs2*i;
1433           if (roworiented) {
1434             if (is == ADD_VALUES) {
1435               for (ii=0; ii<bs; ii++,value+=stepval) {
1436                 for (jj=ii; jj<bs2; jj+=bs) {
1437                   bap[jj] += *value++;
1438                 }
1439               }
1440             } else {
1441               for (ii=0; ii<bs; ii++,value+=stepval) {
1442                 for (jj=ii; jj<bs2; jj+=bs) {
1443                   bap[jj] = *value++;
1444                 }
1445               }
1446             }
1447           } else {
1448             if (is == ADD_VALUES) {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=0; jj<bs; jj++) {
1451                   *bap++ += *value++;
1452                 }
1453               }
1454             } else {
1455               for (ii=0; ii<bs; ii++,value+=stepval) {
1456                 for (jj=0; jj<bs; jj++) {
1457                   *bap++  = *value++;
1458                 }
1459               }
1460             }
1461           }
1462           goto noinsert2;
1463         }
1464       }
1465       if (nonew == 1) goto noinsert2;
1466       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1467       if (nrow >= rmax) {
1468         /* there is no extra room in row, therefore enlarge */
1469         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1470         MatScalar *new_a;
1471 
1472         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1473 
1474         /* malloc new storage space */
1475         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1476 
1477         /* copy over old data into new slots */
1478         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1479         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1480         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1481         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1482         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1483         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1484         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1485         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1486         /* free up old matrix storage */
1487        if (!a->singlemalloc) {
1488           ierr = PetscFree(a->a);CHKERRQ(ierr);
1489           ierr = PetscFree(a->i);CHKERRQ(ierr);
1490           ierr = PetscFree(a->j);CHKERRQ(ierr);
1491         } else {
1492           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
1493         }
1494         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1495         a->singlemalloc = PETSC_TRUE;
1496 
1497         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1498         rmax = imax[row] = imax[row] + CHUNKSIZE;
1499         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1500         a->maxnz += bs2*CHUNKSIZE;
1501         a->reallocs++;
1502         a->nz++;
1503       }
1504       N = nrow++ - 1;
1505       /* shift up all the later entries in this row */
1506       for (ii=N; ii>=i; ii--) {
1507         rp[ii+1] = rp[ii];
1508         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509       }
1510       if (N >= i) {
1511         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1512       }
1513       rp[i] = col;
1514       bap   = ap +  bs2*i;
1515       if (roworiented) {
1516         for (ii=0; ii<bs; ii++,value+=stepval) {
1517           for (jj=ii; jj<bs2; jj+=bs) {
1518             bap[jj] = *value++;
1519           }
1520         }
1521       } else {
1522         for (ii=0; ii<bs; ii++,value+=stepval) {
1523           for (jj=0; jj<bs; jj++) {
1524             *bap++  = *value++;
1525           }
1526         }
1527       }
1528       noinsert2:;
1529       low = i;
1530     }
1531     ailen[row] = nrow;
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1538 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1539 {
1540   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1541   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1542   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1543   PetscErrorCode ierr;
1544   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1545   MatScalar      *aa = a->a,*ap;
1546   PetscReal      ratio=0.6;
1547 
1548   PetscFunctionBegin;
1549   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1550 
1551   if (m) rmax = ailen[0];
1552   for (i=1; i<mbs; i++) {
1553     /* move each row back by the amount of empty slots (fshift) before it*/
1554     fshift += imax[i-1] - ailen[i-1];
1555     rmax   = PetscMax(rmax,ailen[i]);
1556     if (fshift) {
1557       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1558       N = ailen[i];
1559       for (j=0; j<N; j++) {
1560         ip[j-fshift] = ip[j];
1561         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1562       }
1563     }
1564     ai[i] = ai[i-1] + ailen[i-1];
1565   }
1566   if (mbs) {
1567     fshift += imax[mbs-1] - ailen[mbs-1];
1568     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1569   }
1570   /* reset ilen and imax for each row */
1571   for (i=0; i<mbs; i++) {
1572     ailen[i] = imax[i] = ai[i+1] - ai[i];
1573   }
1574   a->nz = ai[mbs];
1575 
1576   /* diagonals may have moved, so kill the diagonal pointers */
1577   a->idiagvalid = PETSC_FALSE;
1578   if (fshift && a->diag) {
1579     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1580     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1581     a->diag = 0;
1582   }
1583   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1584   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1585   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1586   a->reallocs          = 0;
1587   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1588 
1589   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1590   if (a->compressedrow.use){
1591     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1592   }
1593 
1594   A->same_nonzero = PETSC_TRUE;
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 /*
1599    This function returns an array of flags which indicate the locations of contiguous
1600    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1601    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1602    Assume: sizes should be long enough to hold all the values.
1603 */
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1606 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1607 {
1608   PetscInt   i,j,k,row;
1609   PetscTruth flg;
1610 
1611   PetscFunctionBegin;
1612   for (i=0,j=0; i<n; j++) {
1613     row = idx[i];
1614     if (row%bs!=0) { /* Not the begining of a block */
1615       sizes[j] = 1;
1616       i++;
1617     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1618       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1619       i++;
1620     } else { /* Begining of the block, so check if the complete block exists */
1621       flg = PETSC_TRUE;
1622       for (k=1; k<bs; k++) {
1623         if (row+k != idx[i+k]) { /* break in the block */
1624           flg = PETSC_FALSE;
1625           break;
1626         }
1627       }
1628       if (flg) { /* No break in the bs */
1629         sizes[j] = bs;
1630         i+= bs;
1631       } else {
1632         sizes[j] = 1;
1633         i++;
1634       }
1635     }
1636   }
1637   *bs_max = j;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1643 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1644 {
1645   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1648   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1649   PetscScalar    zero = 0.0;
1650   MatScalar      *aa;
1651 
1652   PetscFunctionBegin;
1653   /* Make a copy of the IS and  sort it */
1654   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1655   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1656 
1657   /* allocate memory for rows,sizes */
1658   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1659   sizes = rows + is_n;
1660 
1661   /* copy IS values to rows, and sort them */
1662   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1663   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1664   if (baij->keepzeroedrows) {
1665     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1666     bs_max = is_n;
1667     A->same_nonzero = PETSC_TRUE;
1668   } else {
1669     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1670     A->same_nonzero = PETSC_FALSE;
1671   }
1672   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1673 
1674   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1675     row   = rows[j];
1676     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1677     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1678     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1679     if (sizes[i] == bs && !baij->keepzeroedrows) {
1680       if (diag) {
1681         if (baij->ilen[row/bs] > 0) {
1682           baij->ilen[row/bs]       = 1;
1683           baij->j[baij->i[row/bs]] = row/bs;
1684           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1685         }
1686         /* Now insert all the diagonal values for this bs */
1687         for (k=0; k<bs; k++) {
1688           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1689         }
1690       } else { /* (!diag) */
1691         baij->ilen[row/bs] = 0;
1692       } /* end (!diag) */
1693     } else { /* (sizes[i] != bs) */
1694 #if defined (PETSC_USE_DEBUG)
1695       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1696 #endif
1697       for (k=0; k<count; k++) {
1698         aa[0] =  zero;
1699         aa    += bs;
1700       }
1701       if (diag) {
1702         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1703       }
1704     }
1705   }
1706 
1707   ierr = PetscFree(rows);CHKERRQ(ierr);
1708   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1714 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1715 {
1716   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1717   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1718   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1719   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1720   PetscErrorCode ierr;
1721   PetscInt       ridx,cidx,bs2=a->bs2;
1722   PetscTruth     roworiented=a->roworiented;
1723   MatScalar      *ap,value,*aa=a->a,*bap;
1724 
1725   PetscFunctionBegin;
1726   for (k=0; k<m; k++) { /* loop over added rows */
1727     row  = im[k]; brow = row/bs;
1728     if (row < 0) continue;
1729 #if defined(PETSC_USE_DEBUG)
1730     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1731 #endif
1732     rp   = aj + ai[brow];
1733     ap   = aa + bs2*ai[brow];
1734     rmax = imax[brow];
1735     nrow = ailen[brow];
1736     low  = 0;
1737     high = nrow;
1738     for (l=0; l<n; l++) { /* loop over added columns */
1739       if (in[l] < 0) continue;
1740 #if defined(PETSC_USE_DEBUG)
1741       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1742 #endif
1743       col = in[l]; bcol = col/bs;
1744       ridx = row % bs; cidx = col % bs;
1745       if (roworiented) {
1746         value = v[l + k*n];
1747       } else {
1748         value = v[k + l*m];
1749       }
1750       if (col < lastcol) low = 0; else high = nrow;
1751       lastcol = col;
1752       while (high-low > 7) {
1753         t = (low+high)/2;
1754         if (rp[t] > bcol) high = t;
1755         else              low  = t;
1756       }
1757       for (i=low; i<high; i++) {
1758         if (rp[i] > bcol) break;
1759         if (rp[i] == bcol) {
1760           bap  = ap +  bs2*i + bs*cidx + ridx;
1761           if (is == ADD_VALUES) *bap += value;
1762           else                  *bap  = value;
1763           goto noinsert1;
1764         }
1765       }
1766       if (nonew == 1) goto noinsert1;
1767       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1768       if (nrow >= rmax) {
1769         /* there is no extra room in row, therefore enlarge */
1770         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1771         MatScalar *new_a;
1772 
1773         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1774 
1775         /* Malloc new storage space */
1776         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1777 
1778         /* copy over old data into new slots */
1779         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1780         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1781         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1782         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1783         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1784         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1785         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1786         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1787         /* free up old matrix storage */
1788         /* free up old matrix storage */
1789        if (!a->singlemalloc) {
1790           ierr = PetscFree(a->a);CHKERRQ(ierr);
1791           ierr = PetscFree(a->i);CHKERRQ(ierr);
1792           ierr = PetscFree(a->j);CHKERRQ(ierr);
1793         } else {
1794           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
1795         }
1796         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1797         a->singlemalloc = PETSC_TRUE;
1798 
1799         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1800         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1801         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1802         a->maxnz += bs2*CHUNKSIZE;
1803         a->reallocs++;
1804         a->nz++;
1805       }
1806       N = nrow++ - 1;
1807       /* shift up all the later entries in this row */
1808       for (ii=N; ii>=i; ii--) {
1809         rp[ii+1] = rp[ii];
1810         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1811       }
1812       if (N>=i) {
1813         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1814       }
1815       rp[i]                      = bcol;
1816       ap[bs2*i + bs*cidx + ridx] = value;
1817       noinsert1:;
1818       low = i;
1819     }
1820     ailen[brow] = nrow;
1821   }
1822   A->same_nonzero = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1829 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1830 {
1831   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1832   Mat            outA;
1833   PetscErrorCode ierr;
1834   PetscTruth     row_identity,col_identity;
1835 
1836   PetscFunctionBegin;
1837   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1838   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1839   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1840   if (!row_identity || !col_identity) {
1841     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1842   }
1843 
1844   outA          = inA;
1845   inA->factor   = FACTOR_LU;
1846 
1847   if (!a->diag) {
1848     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1849   }
1850 
1851   a->row        = row;
1852   a->col        = col;
1853   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1854   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1855 
1856   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1857   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1858   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1859 
1860   /*
1861       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1862       for ILU(0) factorization with natural ordering
1863   */
1864   if (inA->bs < 8) {
1865     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1866   } else {
1867     if (!a->solve_work) {
1868       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1869       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1870     }
1871   }
1872 
1873   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1874 
1875   PetscFunctionReturn(0);
1876 }
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1879 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1880 {
1881   static PetscTruth called = PETSC_FALSE;
1882   MPI_Comm          comm = A->comm;
1883   PetscErrorCode    ierr;
1884 
1885   PetscFunctionBegin;
1886   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1887   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1888   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 EXTERN_C_BEGIN
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1895 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1896 {
1897   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1898   PetscInt    i,nz,nbs;
1899 
1900   PetscFunctionBegin;
1901   nz  = baij->maxnz/baij->bs2;
1902   nbs = baij->nbs;
1903   for (i=0; i<nz; i++) {
1904     baij->j[i] = indices[i];
1905   }
1906   baij->nz = nz;
1907   for (i=0; i<nbs; i++) {
1908     baij->ilen[i] = baij->imax[i];
1909   }
1910 
1911   PetscFunctionReturn(0);
1912 }
1913 EXTERN_C_END
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1917 /*@
1918     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1919        in the matrix.
1920 
1921   Input Parameters:
1922 +  mat - the SeqBAIJ matrix
1923 -  indices - the column indices
1924 
1925   Level: advanced
1926 
1927   Notes:
1928     This can be called if you have precomputed the nonzero structure of the
1929   matrix and want to provide it to the matrix object to improve the performance
1930   of the MatSetValues() operation.
1931 
1932     You MUST have set the correct numbers of nonzeros per row in the call to
1933   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1934 
1935     MUST be called before any calls to MatSetValues();
1936 
1937 @*/
1938 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1939 {
1940   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1941 
1942   PetscFunctionBegin;
1943   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1944   PetscValidPointer(indices,2);
1945   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1946   if (f) {
1947     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1948   } else {
1949     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1950   }
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1956 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1957 {
1958   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1959   PetscErrorCode ierr;
1960   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1961   PetscReal      atmp;
1962   PetscScalar    *x,zero = 0.0;
1963   MatScalar      *aa;
1964   PetscInt       ncols,brow,krow,kcol;
1965 
1966   PetscFunctionBegin;
1967   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1968   bs   = A->bs;
1969   aa   = a->a;
1970   ai   = a->i;
1971   aj   = a->j;
1972   mbs = a->mbs;
1973 
1974   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1975   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1976   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1977   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1978   for (i=0; i<mbs; i++) {
1979     ncols = ai[1] - ai[0]; ai++;
1980     brow  = bs*i;
1981     for (j=0; j<ncols; j++){
1982       /* bcol = bs*(*aj); */
1983       for (kcol=0; kcol<bs; kcol++){
1984         for (krow=0; krow<bs; krow++){
1985           atmp = PetscAbsScalar(*aa); aa++;
1986           row = brow + krow;    /* row index */
1987           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1988           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1989         }
1990       }
1991       aj++;
1992     }
1993   }
1994   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2000 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2001 {
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2011 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2012 {
2013   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2014   PetscFunctionBegin;
2015   *array = a->a;
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2021 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2022 {
2023   PetscFunctionBegin;
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #include "petscblaslapack.h"
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2030 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
2031 {
2032   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2033   PetscErrorCode ierr;
2034   PetscInt       i,bs=Y->bs,j,bs2;
2035   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2036 
2037   PetscFunctionBegin;
2038   if (str == SAME_NONZERO_PATTERN) {
2039     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2040   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2041     if (y->xtoy && y->XtoY != X) {
2042       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2043       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2044     }
2045     if (!y->xtoy) { /* get xtoy */
2046       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2047       y->XtoY = X;
2048     }
2049     bs2 = bs*bs;
2050     for (i=0; i<x->nz; i++) {
2051       j = 0;
2052       while (j < bs2){
2053         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2054         j++;
2055       }
2056     }
2057     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2058   } else {
2059     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 /* -------------------------------------------------------------------*/
2065 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2066        MatGetRow_SeqBAIJ,
2067        MatRestoreRow_SeqBAIJ,
2068        MatMult_SeqBAIJ_N,
2069 /* 4*/ MatMultAdd_SeqBAIJ_N,
2070        MatMultTranspose_SeqBAIJ,
2071        MatMultTransposeAdd_SeqBAIJ,
2072        MatSolve_SeqBAIJ_N,
2073        0,
2074        0,
2075 /*10*/ 0,
2076        MatLUFactor_SeqBAIJ,
2077        0,
2078        0,
2079        MatTranspose_SeqBAIJ,
2080 /*15*/ MatGetInfo_SeqBAIJ,
2081        MatEqual_SeqBAIJ,
2082        MatGetDiagonal_SeqBAIJ,
2083        MatDiagonalScale_SeqBAIJ,
2084        MatNorm_SeqBAIJ,
2085 /*20*/ 0,
2086        MatAssemblyEnd_SeqBAIJ,
2087        0,
2088        MatSetOption_SeqBAIJ,
2089        MatZeroEntries_SeqBAIJ,
2090 /*25*/ MatZeroRows_SeqBAIJ,
2091        MatLUFactorSymbolic_SeqBAIJ,
2092        MatLUFactorNumeric_SeqBAIJ_N,
2093        MatCholeskyFactorSymbolic_SeqBAIJ,
2094        MatCholeskyFactorNumeric_SeqBAIJ_N,
2095 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2096        MatILUFactorSymbolic_SeqBAIJ,
2097        MatICCFactorSymbolic_SeqBAIJ,
2098        MatGetArray_SeqBAIJ,
2099        MatRestoreArray_SeqBAIJ,
2100 /*35*/ MatDuplicate_SeqBAIJ,
2101        0,
2102        0,
2103        MatILUFactor_SeqBAIJ,
2104        0,
2105 /*40*/ MatAXPY_SeqBAIJ,
2106        MatGetSubMatrices_SeqBAIJ,
2107        MatIncreaseOverlap_SeqBAIJ,
2108        MatGetValues_SeqBAIJ,
2109        0,
2110 /*45*/ MatPrintHelp_SeqBAIJ,
2111        MatScale_SeqBAIJ,
2112        0,
2113        0,
2114        0,
2115 /*50*/ 0,
2116        MatGetRowIJ_SeqBAIJ,
2117        MatRestoreRowIJ_SeqBAIJ,
2118        0,
2119        0,
2120 /*55*/ 0,
2121        0,
2122        0,
2123        0,
2124        MatSetValuesBlocked_SeqBAIJ,
2125 /*60*/ MatGetSubMatrix_SeqBAIJ,
2126        MatDestroy_SeqBAIJ,
2127        MatView_SeqBAIJ,
2128        MatGetPetscMaps_Petsc,
2129        0,
2130 /*65*/ 0,
2131        0,
2132        0,
2133        0,
2134        0,
2135 /*70*/ MatGetRowMax_SeqBAIJ,
2136        MatConvert_Basic,
2137        0,
2138        0,
2139        0,
2140 /*75*/ 0,
2141        0,
2142        0,
2143        0,
2144        0,
2145 /*80*/ 0,
2146        0,
2147        0,
2148        0,
2149        MatLoad_SeqBAIJ,
2150 /*85*/ 0,
2151        0,
2152        0,
2153        0,
2154        0,
2155 /*90*/ 0,
2156        0,
2157        0,
2158        0,
2159        0,
2160 /*95*/ 0,
2161        0,
2162        0,
2163        0};
2164 
2165 EXTERN_C_BEGIN
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2168 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2169 {
2170   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2171   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   if (aij->nonew != 1) {
2176     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2177   }
2178 
2179   /* allocate space for values if not already there */
2180   if (!aij->saved_values) {
2181     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2182   }
2183 
2184   /* copy values over */
2185   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 EXTERN_C_END
2189 
2190 EXTERN_C_BEGIN
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2193 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2194 {
2195   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2196   PetscErrorCode ierr;
2197   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2198 
2199   PetscFunctionBegin;
2200   if (aij->nonew != 1) {
2201     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2202   }
2203   if (!aij->saved_values) {
2204     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2205   }
2206 
2207   /* copy values over */
2208   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 EXTERN_C_END
2212 
2213 EXTERN_C_BEGIN
2214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2216 EXTERN_C_END
2217 
2218 EXTERN_C_BEGIN
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2221 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2222 {
2223   Mat_SeqBAIJ    *b;
2224   PetscErrorCode ierr;
2225   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2226   PetscTruth     flg,skipallocation = PETSC_FALSE;
2227 
2228   PetscFunctionBegin;
2229 
2230   if (nz == MAT_SKIP_ALLOCATION) {
2231     skipallocation = PETSC_TRUE;
2232     nz             = 0;
2233   }
2234 
2235   B->preallocated = PETSC_TRUE;
2236   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2237   if (nnz && newbs != bs) {
2238     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2239   }
2240   bs   = newbs;
2241 
2242   mbs  = B->m/bs;
2243   nbs  = B->n/bs;
2244   bs2  = bs*bs;
2245 
2246   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2247     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2248   }
2249 
2250   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2251   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2252   if (nnz) {
2253     for (i=0; i<mbs; i++) {
2254       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2255       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2256     }
2257   }
2258 
2259   b       = (Mat_SeqBAIJ*)B->data;
2260   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2261   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2262   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2263   if (!flg) {
2264     switch (bs) {
2265     case 1:
2266       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2267       B->ops->mult            = MatMult_SeqBAIJ_1;
2268       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2269       break;
2270     case 2:
2271       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2272       B->ops->mult            = MatMult_SeqBAIJ_2;
2273       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2274       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2275       break;
2276     case 3:
2277       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2278       B->ops->mult            = MatMult_SeqBAIJ_3;
2279       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2280       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2281       break;
2282     case 4:
2283       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2284       B->ops->mult            = MatMult_SeqBAIJ_4;
2285       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2286       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2287       break;
2288     case 5:
2289       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2290       B->ops->mult            = MatMult_SeqBAIJ_5;
2291       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2292       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2293       break;
2294     case 6:
2295       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2296       B->ops->mult            = MatMult_SeqBAIJ_6;
2297       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2298       break;
2299     case 7:
2300       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2301       B->ops->mult            = MatMult_SeqBAIJ_7;
2302       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2303       break;
2304     default:
2305       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2306       B->ops->mult            = MatMult_SeqBAIJ_N;
2307       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2308       break;
2309     }
2310   }
2311   B->bs      = bs;
2312   b->mbs     = mbs;
2313   b->nbs     = nbs;
2314   if (!skipallocation) {
2315     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2316     /* b->ilen will count nonzeros in each block row so far. */
2317     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2318     if (!nnz) {
2319       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2320       else if (nz <= 0)        nz = 1;
2321       for (i=0; i<mbs; i++) b->imax[i] = nz;
2322       nz = nz*mbs;
2323     } else {
2324       nz = 0;
2325       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2326     }
2327 
2328     /* allocate the matrix space */
2329     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2330     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2331     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2332     b->singlemalloc = PETSC_TRUE;
2333 
2334     b->i[0] = 0;
2335     for (i=1; i<mbs+1; i++) {
2336       b->i[i] = b->i[i-1] + b->imax[i-1];
2337     }
2338   }
2339 
2340   B->bs               = bs;
2341   b->bs2              = bs2;
2342   b->mbs              = mbs;
2343   b->nz               = 0;
2344   b->maxnz            = nz*bs2;
2345   B->info.nz_unneeded = (PetscReal)b->maxnz;
2346   PetscFunctionReturn(0);
2347 }
2348 EXTERN_C_END
2349 
2350 /*MC
2351    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2352    block sparse compressed row format.
2353 
2354    Options Database Keys:
2355 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2356 
2357   Level: beginner
2358 
2359 .seealso: MatCreateSeqBAIJ
2360 M*/
2361 
2362 EXTERN_C_BEGIN
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatCreate_SeqBAIJ"
2365 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2366 {
2367   PetscErrorCode ierr;
2368   PetscMPIInt    size;
2369   Mat_SeqBAIJ    *b;
2370 
2371   PetscFunctionBegin;
2372   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2373   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2374 
2375   B->m = B->M = PetscMax(B->m,B->M);
2376   B->n = B->N = PetscMax(B->n,B->N);
2377   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2378   B->data = (void*)b;
2379   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2380   B->factor           = 0;
2381   B->mapping          = 0;
2382   b->row              = 0;
2383   b->col              = 0;
2384   b->icol             = 0;
2385   b->reallocs         = 0;
2386   b->saved_values     = 0;
2387 #if defined(PETSC_USE_MAT_SINGLE)
2388   b->setvalueslen     = 0;
2389   b->setvaluescopy    = PETSC_NULL;
2390 #endif
2391 
2392   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2393   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2394 
2395   b->sorted           = PETSC_FALSE;
2396   b->roworiented      = PETSC_TRUE;
2397   b->nonew            = 0;
2398   b->diag             = 0;
2399   b->solve_work       = 0;
2400   b->mult_work        = 0;
2401   B->spptr            = 0;
2402   B->info.nz_unneeded = (PetscReal)b->maxnz;
2403   b->keepzeroedrows   = PETSC_FALSE;
2404   b->xtoy              = 0;
2405   b->XtoY              = 0;
2406   b->compressedrow.use     = PETSC_FALSE;
2407   b->compressedrow.nrows   = 0;
2408   b->compressedrow.i       = PETSC_NULL;
2409   b->compressedrow.rindex  = PETSC_NULL;
2410   b->compressedrow.checked = PETSC_FALSE;
2411   B->same_nonzero          = PETSC_FALSE;
2412 
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2414                                      "MatInvertBlockDiagonal_SeqBAIJ",
2415                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2417                                      "MatStoreValues_SeqBAIJ",
2418                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2420                                      "MatRetrieveValues_SeqBAIJ",
2421                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2423                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2424                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2426                                      "MatConvert_SeqBAIJ_SeqAIJ",
2427                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2428 #if !defined(PETSC_USE_64BIT_INT)
2429   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2430                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2431                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2432 #endif
2433   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2434                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2435                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 EXTERN_C_END
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2442 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2443 {
2444   Mat            C;
2445   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2446   PetscErrorCode ierr;
2447   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2448 
2449   PetscFunctionBegin;
2450   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2451 
2452   *B = 0;
2453   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2454   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2455   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2456   c    = (Mat_SeqBAIJ*)C->data;
2457 
2458   C->M   = A->M;
2459   C->N   = A->N;
2460   C->bs  = A->bs;
2461   c->bs2 = a->bs2;
2462   c->mbs = a->mbs;
2463   c->nbs = a->nbs;
2464 
2465   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2466   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2467   for (i=0; i<mbs; i++) {
2468     c->imax[i] = a->imax[i];
2469     c->ilen[i] = a->ilen[i];
2470   }
2471 
2472   /* allocate the matrix space */
2473   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2474   c->singlemalloc = PETSC_TRUE;
2475   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476   if (mbs > 0) {
2477     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2478     if (cpvalues == MAT_COPY_VALUES) {
2479       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2480     } else {
2481       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2482     }
2483   }
2484   c->sorted      = a->sorted;
2485   c->roworiented = a->roworiented;
2486   c->nonew       = a->nonew;
2487 
2488   if (a->diag) {
2489     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2490     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2491     for (i=0; i<mbs; i++) {
2492       c->diag[i] = a->diag[i];
2493     }
2494   } else c->diag        = 0;
2495   c->nz                 = a->nz;
2496   c->maxnz              = a->maxnz;
2497   c->solve_work         = 0;
2498   c->mult_work          = 0;
2499   C->preallocated       = PETSC_TRUE;
2500   C->assembled          = PETSC_TRUE;
2501 
2502   c->compressedrow.use     = a->compressedrow.use;
2503   c->compressedrow.nrows   = a->compressedrow.nrows;
2504   c->compressedrow.checked = a->compressedrow.checked;
2505   if ( a->compressedrow.checked && a->compressedrow.use){
2506     i = a->compressedrow.nrows;
2507     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2508     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2509     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2510     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2511   } else {
2512     c->compressedrow.use    = PETSC_FALSE;
2513     c->compressedrow.i      = PETSC_NULL;
2514     c->compressedrow.rindex = PETSC_NULL;
2515   }
2516   C->same_nonzero = A->same_nonzero;
2517   *B = C;
2518   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatLoad_SeqBAIJ"
2524 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2525 {
2526   Mat_SeqBAIJ    *a;
2527   Mat            B;
2528   PetscErrorCode ierr;
2529   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2530   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2531   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2532   PetscInt       *masked,nmask,tmp,bs2,ishift;
2533   PetscMPIInt    size;
2534   int            fd;
2535   PetscScalar    *aa;
2536   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2540   bs2  = bs*bs;
2541 
2542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2543   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2544   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2545   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2546   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2547   M = header[1]; N = header[2]; nz = header[3];
2548 
2549   if (header[3] < 0) {
2550     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2551   }
2552 
2553   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2554 
2555   /*
2556      This code adds extra rows to make sure the number of rows is
2557     divisible by the blocksize
2558   */
2559   mbs        = M/bs;
2560   extra_rows = bs - M + bs*(mbs);
2561   if (extra_rows == bs) extra_rows = 0;
2562   else                  mbs++;
2563   if (extra_rows) {
2564     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2565   }
2566 
2567   /* read in row lengths */
2568   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2569   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2570   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2571 
2572   /* read in column indices */
2573   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2574   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2575   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2576 
2577   /* loop over row lengths determining block row lengths */
2578   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2579   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2580   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2581   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2582   masked   = mask + mbs;
2583   rowcount = 0; nzcount = 0;
2584   for (i=0; i<mbs; i++) {
2585     nmask = 0;
2586     for (j=0; j<bs; j++) {
2587       kmax = rowlengths[rowcount];
2588       for (k=0; k<kmax; k++) {
2589         tmp = jj[nzcount++]/bs;
2590         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2591       }
2592       rowcount++;
2593     }
2594     browlengths[i] += nmask;
2595     /* zero out the mask elements we set */
2596     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2597   }
2598 
2599   /* create our matrix */
2600   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2601   ierr = MatSetType(B,type);CHKERRQ(ierr);
2602   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2603   a = (Mat_SeqBAIJ*)B->data;
2604 
2605   /* set matrix "i" values */
2606   a->i[0] = 0;
2607   for (i=1; i<= mbs; i++) {
2608     a->i[i]      = a->i[i-1] + browlengths[i-1];
2609     a->ilen[i-1] = browlengths[i-1];
2610   }
2611   a->nz         = 0;
2612   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2613 
2614   /* read in nonzero values */
2615   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2616   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2617   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2618 
2619   /* set "a" and "j" values into matrix */
2620   nzcount = 0; jcount = 0;
2621   for (i=0; i<mbs; i++) {
2622     nzcountb = nzcount;
2623     nmask    = 0;
2624     for (j=0; j<bs; j++) {
2625       kmax = rowlengths[i*bs+j];
2626       for (k=0; k<kmax; k++) {
2627         tmp = jj[nzcount++]/bs;
2628 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2629       }
2630     }
2631     /* sort the masked values */
2632     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2633 
2634     /* set "j" values into matrix */
2635     maskcount = 1;
2636     for (j=0; j<nmask; j++) {
2637       a->j[jcount++]  = masked[j];
2638       mask[masked[j]] = maskcount++;
2639     }
2640     /* set "a" values into matrix */
2641     ishift = bs2*a->i[i];
2642     for (j=0; j<bs; j++) {
2643       kmax = rowlengths[i*bs+j];
2644       for (k=0; k<kmax; k++) {
2645         tmp       = jj[nzcountb]/bs ;
2646         block     = mask[tmp] - 1;
2647         point     = jj[nzcountb] - bs*tmp;
2648         idx       = ishift + bs2*block + j + bs*point;
2649         a->a[idx] = (MatScalar)aa[nzcountb++];
2650       }
2651     }
2652     /* zero out the mask elements we set */
2653     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2654   }
2655   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2656 
2657   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2658   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2659   ierr = PetscFree(aa);CHKERRQ(ierr);
2660   ierr = PetscFree(jj);CHKERRQ(ierr);
2661   ierr = PetscFree(mask);CHKERRQ(ierr);
2662 
2663   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = MatView_Private(B);CHKERRQ(ierr);
2666 
2667   *A = B;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatCreateSeqBAIJ"
2673 /*@C
2674    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2675    compressed row) format.  For good matrix assembly performance the
2676    user should preallocate the matrix storage by setting the parameter nz
2677    (or the array nnz).  By setting these parameters accurately, performance
2678    during matrix assembly can be increased by more than a factor of 50.
2679 
2680    Collective on MPI_Comm
2681 
2682    Input Parameters:
2683 +  comm - MPI communicator, set to PETSC_COMM_SELF
2684 .  bs - size of block
2685 .  m - number of rows
2686 .  n - number of columns
2687 .  nz - number of nonzero blocks  per block row (same for all rows)
2688 -  nnz - array containing the number of nonzero blocks in the various block rows
2689          (possibly different for each block row) or PETSC_NULL
2690 
2691    Output Parameter:
2692 .  A - the matrix
2693 
2694    Options Database Keys:
2695 .   -mat_no_unroll - uses code that does not unroll the loops in the
2696                      block calculations (much slower)
2697 .    -mat_block_size - size of the blocks to use
2698 
2699    Level: intermediate
2700 
2701    Notes:
2702    The number of rows and columns must be divisible by blocksize.
2703 
2704    If the nnz parameter is given then the nz parameter is ignored
2705 
2706    A nonzero block is any block that as 1 or more nonzeros in it
2707 
2708    The block AIJ format is fully compatible with standard Fortran 77
2709    storage.  That is, the stored row and column indices can begin at
2710    either one (as in Fortran) or zero.  See the users' manual for details.
2711 
2712    Specify the preallocated storage with either nz or nnz (not both).
2713    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2714    allocation.  For additional details, see the users manual chapter on
2715    matrices.
2716 
2717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2718 @*/
2719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2720 {
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2725   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2726   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2732 /*@C
2733    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2734    per row in the matrix. For good matrix assembly performance the
2735    user should preallocate the matrix storage by setting the parameter nz
2736    (or the array nnz).  By setting these parameters accurately, performance
2737    during matrix assembly can be increased by more than a factor of 50.
2738 
2739    Collective on MPI_Comm
2740 
2741    Input Parameters:
2742 +  A - the matrix
2743 .  bs - size of block
2744 .  nz - number of block nonzeros per block row (same for all rows)
2745 -  nnz - array containing the number of block nonzeros in the various block rows
2746          (possibly different for each block row) or PETSC_NULL
2747 
2748    Options Database Keys:
2749 .   -mat_no_unroll - uses code that does not unroll the loops in the
2750                      block calculations (much slower)
2751 .    -mat_block_size - size of the blocks to use
2752 
2753    Level: intermediate
2754 
2755    Notes:
2756    If the nnz parameter is given then the nz parameter is ignored
2757 
2758    The block AIJ format is fully compatible with standard Fortran 77
2759    storage.  That is, the stored row and column indices can begin at
2760    either one (as in Fortran) or zero.  See the users' manual for details.
2761 
2762    Specify the preallocated storage with either nz or nnz (not both).
2763    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2764    allocation.  For additional details, see the users manual chapter on
2765    matrices.
2766 
2767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2768 @*/
2769 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2770 {
2771   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2775   if (f) {
2776     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2777   }
2778   PetscFunctionReturn(0);
2779 }
2780 
2781