xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 45bfc511327f17f9664eea9fee8d9f549538c00a)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = PetscFree(a->a);CHKERRQ(ierr);
831   if (!a->singlemalloc) {
832     ierr = PetscFree(a->i);CHKERRQ(ierr);
833     ierr = PetscFree(a->j);CHKERRQ(ierr);
834   }
835   if (a->row) {
836     ierr = ISDestroy(a->row);CHKERRQ(ierr);
837   }
838   if (a->col) {
839     ierr = ISDestroy(a->col);CHKERRQ(ierr);
840   }
841   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
842   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
843   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
844   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
845   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
846   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
847   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
848   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
849 #if defined(PETSC_USE_MAT_SINGLE)
850   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
851 #endif
852   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
853   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
854 
855   ierr = PetscFree(a);CHKERRQ(ierr);
856 
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatSetOption_SeqBAIJ"
869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
870 {
871   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   switch (op) {
876   case MAT_ROW_ORIENTED:
877     a->roworiented    = PETSC_TRUE;
878     break;
879   case MAT_COLUMN_ORIENTED:
880     a->roworiented    = PETSC_FALSE;
881     break;
882   case MAT_COLUMNS_SORTED:
883     a->sorted         = PETSC_TRUE;
884     break;
885   case MAT_COLUMNS_UNSORTED:
886     a->sorted         = PETSC_FALSE;
887     break;
888   case MAT_KEEP_ZEROED_ROWS:
889     a->keepzeroedrows = PETSC_TRUE;
890     break;
891   case MAT_NO_NEW_NONZERO_LOCATIONS:
892     a->nonew          = 1;
893     break;
894   case MAT_NEW_NONZERO_LOCATION_ERR:
895     a->nonew          = -1;
896     break;
897   case MAT_NEW_NONZERO_ALLOCATION_ERR:
898     a->nonew          = -2;
899     break;
900   case MAT_YES_NEW_NONZERO_LOCATIONS:
901     a->nonew          = 0;
902     break;
903   case MAT_ROWS_SORTED:
904   case MAT_ROWS_UNSORTED:
905   case MAT_YES_NEW_DIAGONALS:
906   case MAT_IGNORE_OFF_PROC_ENTRIES:
907   case MAT_USE_HASH_TABLE:
908     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
909     break;
910   case MAT_NO_NEW_DIAGONALS:
911     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
912   case MAT_SYMMETRIC:
913   case MAT_STRUCTURALLY_SYMMETRIC:
914   case MAT_NOT_SYMMETRIC:
915   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
916   case MAT_HERMITIAN:
917   case MAT_NOT_HERMITIAN:
918   case MAT_SYMMETRY_ETERNAL:
919   case MAT_NOT_SYMMETRY_ETERNAL:
920     break;
921   default:
922     SETERRQ(PETSC_ERR_SUP,"unknown option");
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatGetRow_SeqBAIJ"
929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
930 {
931   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
932   PetscErrorCode ierr;
933   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
934   MatScalar      *aa,*aa_i;
935   PetscScalar    *v_i;
936 
937   PetscFunctionBegin;
938   bs  = A->bs;
939   ai  = a->i;
940   aj  = a->j;
941   aa  = a->a;
942   bs2 = a->bs2;
943 
944   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
945 
946   bn  = row/bs;   /* Block number */
947   bp  = row % bs; /* Block Position */
948   M   = ai[bn+1] - ai[bn];
949   *nz = bs*M;
950 
951   if (v) {
952     *v = 0;
953     if (*nz) {
954       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
955       for (i=0; i<M; i++) { /* for each block in the block row */
956         v_i  = *v + i*bs;
957         aa_i = aa + bs2*(ai[bn] + i);
958         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
959       }
960     }
961   }
962 
963   if (idx) {
964     *idx = 0;
965     if (*nz) {
966       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
967       for (i=0; i<M; i++) { /* for each block in the block row */
968         idx_i = *idx + i*bs;
969         itmp  = bs*aj[ai[bn] + i];
970         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
971       }
972     }
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
980 {
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
985   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatTranspose_SeqBAIJ"
991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
992 {
993   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
994   Mat            C;
995   PetscErrorCode ierr;
996   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
997   PetscInt       *rows,*cols,bs2=a->bs2;
998   PetscScalar    *array;
999 
1000   PetscFunctionBegin;
1001   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1002   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1003   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1004 
1005 #if defined(PETSC_USE_MAT_SINGLE)
1006   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1007   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1008 #else
1009   array = a->a;
1010 #endif
1011 
1012   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1013   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1014   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1015   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1016   ierr = PetscFree(col);CHKERRQ(ierr);
1017   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1018   cols = rows + bs;
1019   for (i=0; i<mbs; i++) {
1020     cols[0] = i*bs;
1021     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1022     len = ai[i+1] - ai[i];
1023     for (j=0; j<len; j++) {
1024       rows[0] = (*aj++)*bs;
1025       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1026       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1027       array += bs2;
1028     }
1029   }
1030   ierr = PetscFree(rows);CHKERRQ(ierr);
1031 #if defined(PETSC_USE_MAT_SINGLE)
1032   ierr = PetscFree(array);
1033 #endif
1034 
1035   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037 
1038   if (B) {
1039     *B = C;
1040   } else {
1041     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1042   }
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1048 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1049 {
1050   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1053   int            fd;
1054   PetscScalar    *aa;
1055   FILE           *file;
1056 
1057   PetscFunctionBegin;
1058   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1059   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1060   col_lens[0] = MAT_FILE_COOKIE;
1061 
1062   col_lens[1] = A->m;
1063   col_lens[2] = A->n;
1064   col_lens[3] = a->nz*bs2;
1065 
1066   /* store lengths of each row and write (including header) to file */
1067   count = 0;
1068   for (i=0; i<a->mbs; i++) {
1069     for (j=0; j<bs; j++) {
1070       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1071     }
1072   }
1073   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1074   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1075 
1076   /* store column indices (zero start index) */
1077   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1078   count = 0;
1079   for (i=0; i<a->mbs; i++) {
1080     for (j=0; j<bs; j++) {
1081       for (k=a->i[i]; k<a->i[i+1]; k++) {
1082         for (l=0; l<bs; l++) {
1083           jj[count++] = bs*a->j[k] + l;
1084         }
1085       }
1086     }
1087   }
1088   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1089   ierr = PetscFree(jj);CHKERRQ(ierr);
1090 
1091   /* store nonzero values */
1092   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1093   count = 0;
1094   for (i=0; i<a->mbs; i++) {
1095     for (j=0; j<bs; j++) {
1096       for (k=a->i[i]; k<a->i[i+1]; k++) {
1097         for (l=0; l<bs; l++) {
1098           aa[count++] = a->a[bs2*k + l*bs + j];
1099         }
1100       }
1101     }
1102   }
1103   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1104   ierr = PetscFree(aa);CHKERRQ(ierr);
1105 
1106   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1107   if (file) {
1108     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1109   }
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1115 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1116 {
1117   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1118   PetscErrorCode    ierr;
1119   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1120   PetscViewerFormat format;
1121 
1122   PetscFunctionBegin;
1123   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1124   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1125     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1126   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1127     Mat aij;
1128     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1129     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1130     ierr = MatDestroy(aij);CHKERRQ(ierr);
1131   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1132      PetscFunctionReturn(0);
1133   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1134     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1135     for (i=0; i<a->mbs; i++) {
1136       for (j=0; j<bs; j++) {
1137         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1138         for (k=a->i[i]; k<a->i[i+1]; k++) {
1139           for (l=0; l<bs; l++) {
1140 #if defined(PETSC_USE_COMPLEX)
1141             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1142               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1143                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1144             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1145               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1146                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1147             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             }
1150 #else
1151             if (a->a[bs2*k + l*bs + j] != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1153             }
1154 #endif
1155           }
1156         }
1157         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1158       }
1159     }
1160     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1161   } else {
1162     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1163     for (i=0; i<a->mbs; i++) {
1164       for (j=0; j<bs; j++) {
1165         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1166         for (k=a->i[i]; k<a->i[i+1]; k++) {
1167           for (l=0; l<bs; l++) {
1168 #if defined(PETSC_USE_COMPLEX)
1169             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1170               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1171                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1172             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1174                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1175             } else {
1176               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             }
1178 #else
1179             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1180 #endif
1181           }
1182         }
1183         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1184       }
1185     }
1186     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1187   }
1188   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1194 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1195 {
1196   Mat            A = (Mat) Aa;
1197   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1198   PetscErrorCode ierr;
1199   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1200   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1201   MatScalar      *aa;
1202   PetscViewer    viewer;
1203 
1204   PetscFunctionBegin;
1205 
1206   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1207   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1208 
1209   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1210 
1211   /* loop over matrix elements drawing boxes */
1212   color = PETSC_DRAW_BLUE;
1213   for (i=0,row=0; i<mbs; i++,row+=bs) {
1214     for (j=a->i[i]; j<a->i[i+1]; j++) {
1215       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1216       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1217       aa = a->a + j*bs2;
1218       for (k=0; k<bs; k++) {
1219         for (l=0; l<bs; l++) {
1220           if (PetscRealPart(*aa++) >=  0.) continue;
1221           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1222         }
1223       }
1224     }
1225   }
1226   color = PETSC_DRAW_CYAN;
1227   for (i=0,row=0; i<mbs; i++,row+=bs) {
1228     for (j=a->i[i]; j<a->i[i+1]; j++) {
1229       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1230       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1231       aa = a->a + j*bs2;
1232       for (k=0; k<bs; k++) {
1233         for (l=0; l<bs; l++) {
1234           if (PetscRealPart(*aa++) != 0.) continue;
1235           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1236         }
1237       }
1238     }
1239   }
1240 
1241   color = PETSC_DRAW_RED;
1242   for (i=0,row=0; i<mbs; i++,row+=bs) {
1243     for (j=a->i[i]; j<a->i[i+1]; j++) {
1244       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1245       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1246       aa = a->a + j*bs2;
1247       for (k=0; k<bs; k++) {
1248         for (l=0; l<bs; l++) {
1249           if (PetscRealPart(*aa++) <= 0.) continue;
1250           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1251         }
1252       }
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1260 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1261 {
1262   PetscErrorCode ierr;
1263   PetscReal      xl,yl,xr,yr,w,h;
1264   PetscDraw      draw;
1265   PetscTruth     isnull;
1266 
1267   PetscFunctionBegin;
1268 
1269   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1270   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1271 
1272   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1273   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1274   xr += w;    yr += h;  xl = -w;     yl = -h;
1275   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1276   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1277   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_SeqBAIJ"
1283 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1284 {
1285   PetscErrorCode ierr;
1286   PetscTruth     iascii,isbinary,isdraw;
1287 
1288   PetscFunctionBegin;
1289   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1290   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1292   if (iascii){
1293     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1294   } else if (isbinary) {
1295     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1296   } else if (isdraw) {
1297     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1298   } else {
1299     Mat B;
1300     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1301     ierr = MatView(B,viewer);CHKERRQ(ierr);
1302     ierr = MatDestroy(B);CHKERRQ(ierr);
1303   }
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1310 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1311 {
1312   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1313   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1314   PetscInt    *ai = a->i,*ailen = a->ilen;
1315   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1316   MatScalar   *ap,*aa = a->a,zero = 0.0;
1317 
1318   PetscFunctionBegin;
1319   for (k=0; k<m; k++) { /* loop over rows */
1320     row  = im[k]; brow = row/bs;
1321     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1322     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1323     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1324     nrow = ailen[brow];
1325     for (l=0; l<n; l++) { /* loop over columns */
1326       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1327       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1328       col  = in[l] ;
1329       bcol = col/bs;
1330       cidx = col%bs;
1331       ridx = row%bs;
1332       high = nrow;
1333       low  = 0; /* assume unsorted */
1334       while (high-low > 5) {
1335         t = (low+high)/2;
1336         if (rp[t] > bcol) high = t;
1337         else             low  = t;
1338       }
1339       for (i=low; i<high; i++) {
1340         if (rp[i] > bcol) break;
1341         if (rp[i] == bcol) {
1342           *v++ = ap[bs2*i+bs*cidx+ridx];
1343           goto finished;
1344         }
1345       }
1346       *v++ = zero;
1347       finished:;
1348     }
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #if defined(PETSC_USE_MAT_SINGLE)
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1356 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1357 {
1358   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,N = m*n*b->bs2;
1361   MatScalar      *vsingle;
1362 
1363   PetscFunctionBegin;
1364   if (N > b->setvalueslen) {
1365     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1366     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1367     b->setvalueslen  = N;
1368   }
1369   vsingle = b->setvaluescopy;
1370   for (i=0; i<N; i++) {
1371     vsingle[i] = v[i];
1372   }
1373   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 #endif
1377 
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1382 {
1383   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1384   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1385   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1386   PetscErrorCode  ierr;
1387   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1388   PetscTruth      roworiented=a->roworiented;
1389   const MatScalar *value = v;
1390   MatScalar       *ap,*aa = a->a,*bap;
1391 
1392   PetscFunctionBegin;
1393   if (roworiented) {
1394     stepval = (n-1)*bs;
1395   } else {
1396     stepval = (m-1)*bs;
1397   }
1398   for (k=0; k<m; k++) { /* loop over added rows */
1399     row  = im[k];
1400     if (row < 0) continue;
1401 #if defined(PETSC_USE_DEBUG)
1402     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1403 #endif
1404     rp   = aj + ai[row];
1405     ap   = aa + bs2*ai[row];
1406     rmax = imax[row];
1407     nrow = ailen[row];
1408     low  = 0;
1409     high = nrow;
1410     for (l=0; l<n; l++) { /* loop over added columns */
1411       if (in[l] < 0) continue;
1412 #if defined(PETSC_USE_DEBUG)
1413       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1414 #endif
1415       col = in[l];
1416       if (roworiented) {
1417         value = v + k*(stepval+bs)*bs + l*bs;
1418       } else {
1419         value = v + l*(stepval+bs)*bs + k*bs;
1420       }
1421       if (col < lastcol) low = 0; else high = nrow;
1422       lastcol = col;
1423       while (high-low > 7) {
1424         t = (low+high)/2;
1425         if (rp[t] > col) high = t;
1426         else             low  = t;
1427       }
1428       for (i=low; i<high; i++) {
1429         if (rp[i] > col) break;
1430         if (rp[i] == col) {
1431           bap  = ap +  bs2*i;
1432           if (roworiented) {
1433             if (is == ADD_VALUES) {
1434               for (ii=0; ii<bs; ii++,value+=stepval) {
1435                 for (jj=ii; jj<bs2; jj+=bs) {
1436                   bap[jj] += *value++;
1437                 }
1438               }
1439             } else {
1440               for (ii=0; ii<bs; ii++,value+=stepval) {
1441                 for (jj=ii; jj<bs2; jj+=bs) {
1442                   bap[jj] = *value++;
1443                 }
1444               }
1445             }
1446           } else {
1447             if (is == ADD_VALUES) {
1448               for (ii=0; ii<bs; ii++,value+=stepval) {
1449                 for (jj=0; jj<bs; jj++) {
1450                   *bap++ += *value++;
1451                 }
1452               }
1453             } else {
1454               for (ii=0; ii<bs; ii++,value+=stepval) {
1455                 for (jj=0; jj<bs; jj++) {
1456                   *bap++  = *value++;
1457                 }
1458               }
1459             }
1460           }
1461           goto noinsert2;
1462         }
1463       }
1464       if (nonew == 1) goto noinsert2;
1465       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1466       if (nrow >= rmax) {
1467         /* there is no extra room in row, therefore enlarge */
1468         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1469         MatScalar *new_a;
1470 
1471         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1472 
1473         /* malloc new storage space */
1474         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1475 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1476         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1477         new_i   = new_j + new_nz;
1478 
1479         /* copy over old data into new slots */
1480         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1481         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1482         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1483         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1484         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1485         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1486         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1487         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1488         /* free up old matrix storage */
1489         ierr = PetscFree(a->a);CHKERRQ(ierr);
1490         if (!a->singlemalloc) {
1491           ierr = PetscFree(a->i);CHKERRQ(ierr);
1492           ierr = PetscFree(a->j);CHKERRQ(ierr);
1493         }
1494         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1495         a->singlemalloc = PETSC_TRUE;
1496 
1497         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1498         rmax = imax[row] = imax[row] + CHUNKSIZE;
1499         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1500         a->maxnz += bs2*CHUNKSIZE;
1501         a->reallocs++;
1502         a->nz++;
1503       }
1504       N = nrow++ - 1;
1505       /* shift up all the later entries in this row */
1506       for (ii=N; ii>=i; ii--) {
1507         rp[ii+1] = rp[ii];
1508         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1509       }
1510       if (N >= i) {
1511         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1512       }
1513       rp[i] = col;
1514       bap   = ap +  bs2*i;
1515       if (roworiented) {
1516         for (ii=0; ii<bs; ii++,value+=stepval) {
1517           for (jj=ii; jj<bs2; jj+=bs) {
1518             bap[jj] = *value++;
1519           }
1520         }
1521       } else {
1522         for (ii=0; ii<bs; ii++,value+=stepval) {
1523           for (jj=0; jj<bs; jj++) {
1524             *bap++  = *value++;
1525           }
1526         }
1527       }
1528       noinsert2:;
1529       low = i;
1530     }
1531     ailen[row] = nrow;
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1538 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1539 {
1540   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1541   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1542   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1543   PetscErrorCode ierr;
1544   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1545   MatScalar      *aa = a->a,*ap;
1546   PetscReal      ratio=0.6;
1547 
1548   PetscFunctionBegin;
1549   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1550 
1551   if (m) rmax = ailen[0];
1552   for (i=1; i<mbs; i++) {
1553     /* move each row back by the amount of empty slots (fshift) before it*/
1554     fshift += imax[i-1] - ailen[i-1];
1555     rmax   = PetscMax(rmax,ailen[i]);
1556     if (fshift) {
1557       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1558       N = ailen[i];
1559       for (j=0; j<N; j++) {
1560         ip[j-fshift] = ip[j];
1561         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1562       }
1563     }
1564     ai[i] = ai[i-1] + ailen[i-1];
1565   }
1566   if (mbs) {
1567     fshift += imax[mbs-1] - ailen[mbs-1];
1568     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1569   }
1570   /* reset ilen and imax for each row */
1571   for (i=0; i<mbs; i++) {
1572     ailen[i] = imax[i] = ai[i+1] - ai[i];
1573   }
1574   a->nz = ai[mbs];
1575 
1576   /* diagonals may have moved, so kill the diagonal pointers */
1577   a->idiagvalid = PETSC_FALSE;
1578   if (fshift && a->diag) {
1579     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1580     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1581     a->diag = 0;
1582   }
1583   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1584   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1585   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1586   a->reallocs          = 0;
1587   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1588 
1589   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1590   if (a->compressedrow.use){
1591     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1592   }
1593 
1594   A->same_nonzero = PETSC_TRUE;
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 /*
1599    This function returns an array of flags which indicate the locations of contiguous
1600    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1601    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1602    Assume: sizes should be long enough to hold all the values.
1603 */
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1606 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1607 {
1608   PetscInt   i,j,k,row;
1609   PetscTruth flg;
1610 
1611   PetscFunctionBegin;
1612   for (i=0,j=0; i<n; j++) {
1613     row = idx[i];
1614     if (row%bs!=0) { /* Not the begining of a block */
1615       sizes[j] = 1;
1616       i++;
1617     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1618       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1619       i++;
1620     } else { /* Begining of the block, so check if the complete block exists */
1621       flg = PETSC_TRUE;
1622       for (k=1; k<bs; k++) {
1623         if (row+k != idx[i+k]) { /* break in the block */
1624           flg = PETSC_FALSE;
1625           break;
1626         }
1627       }
1628       if (flg) { /* No break in the bs */
1629         sizes[j] = bs;
1630         i+= bs;
1631       } else {
1632         sizes[j] = 1;
1633         i++;
1634       }
1635     }
1636   }
1637   *bs_max = j;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1643 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1644 {
1645   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1648   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1649   PetscScalar    zero = 0.0;
1650   MatScalar      *aa;
1651 
1652   PetscFunctionBegin;
1653   /* Make a copy of the IS and  sort it */
1654   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1655   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1656 
1657   /* allocate memory for rows,sizes */
1658   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1659   sizes = rows + is_n;
1660 
1661   /* copy IS values to rows, and sort them */
1662   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1663   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1664   if (baij->keepzeroedrows) {
1665     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1666     bs_max = is_n;
1667     A->same_nonzero = PETSC_TRUE;
1668   } else {
1669     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1670     A->same_nonzero = PETSC_FALSE;
1671   }
1672   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1673 
1674   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1675     row   = rows[j];
1676     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1677     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1678     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1679     if (sizes[i] == bs && !baij->keepzeroedrows) {
1680       if (diag) {
1681         if (baij->ilen[row/bs] > 0) {
1682           baij->ilen[row/bs]       = 1;
1683           baij->j[baij->i[row/bs]] = row/bs;
1684           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1685         }
1686         /* Now insert all the diagonal values for this bs */
1687         for (k=0; k<bs; k++) {
1688           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1689         }
1690       } else { /* (!diag) */
1691         baij->ilen[row/bs] = 0;
1692       } /* end (!diag) */
1693     } else { /* (sizes[i] != bs) */
1694 #if defined (PETSC_USE_DEBUG)
1695       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1696 #endif
1697       for (k=0; k<count; k++) {
1698         aa[0] =  zero;
1699         aa    += bs;
1700       }
1701       if (diag) {
1702         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1703       }
1704     }
1705   }
1706 
1707   ierr = PetscFree(rows);CHKERRQ(ierr);
1708   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1714 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1715 {
1716   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1717   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1718   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1719   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1720   PetscErrorCode ierr;
1721   PetscInt       ridx,cidx,bs2=a->bs2;
1722   PetscTruth     roworiented=a->roworiented;
1723   MatScalar      *ap,value,*aa=a->a,*bap;
1724 
1725   PetscFunctionBegin;
1726   for (k=0; k<m; k++) { /* loop over added rows */
1727     row  = im[k]; brow = row/bs;
1728     if (row < 0) continue;
1729 #if defined(PETSC_USE_DEBUG)
1730     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1731 #endif
1732     rp   = aj + ai[brow];
1733     ap   = aa + bs2*ai[brow];
1734     rmax = imax[brow];
1735     nrow = ailen[brow];
1736     low  = 0;
1737     high = nrow;
1738     for (l=0; l<n; l++) { /* loop over added columns */
1739       if (in[l] < 0) continue;
1740 #if defined(PETSC_USE_DEBUG)
1741       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1742 #endif
1743       col = in[l]; bcol = col/bs;
1744       ridx = row % bs; cidx = col % bs;
1745       if (roworiented) {
1746         value = v[l + k*n];
1747       } else {
1748         value = v[k + l*m];
1749       }
1750       if (col < lastcol) low = 0; else high = nrow;
1751       lastcol = col;
1752       while (high-low > 7) {
1753         t = (low+high)/2;
1754         if (rp[t] > bcol) high = t;
1755         else              low  = t;
1756       }
1757       for (i=low; i<high; i++) {
1758         if (rp[i] > bcol) break;
1759         if (rp[i] == bcol) {
1760           bap  = ap +  bs2*i + bs*cidx + ridx;
1761           if (is == ADD_VALUES) *bap += value;
1762           else                  *bap  = value;
1763           goto noinsert1;
1764         }
1765       }
1766       if (nonew == 1) goto noinsert1;
1767       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1768       if (nrow >= rmax) {
1769         /* there is no extra room in row, therefore enlarge */
1770         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1771         MatScalar *new_a;
1772 
1773         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1774 
1775         /* Malloc new storage space */
1776         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1777 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1778         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1779         new_i   = new_j + new_nz;
1780 
1781         /* copy over old data into new slots */
1782         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1783         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1784         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1785         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1786         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1787         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1788         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1789         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1790         /* free up old matrix storage */
1791         ierr = PetscFree(a->a);CHKERRQ(ierr);
1792         if (!a->singlemalloc) {
1793           ierr = PetscFree(a->i);CHKERRQ(ierr);
1794           ierr = PetscFree(a->j);CHKERRQ(ierr);
1795         }
1796         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1797         a->singlemalloc = PETSC_TRUE;
1798 
1799         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1800         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1801         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1802         a->maxnz += bs2*CHUNKSIZE;
1803         a->reallocs++;
1804         a->nz++;
1805       }
1806       N = nrow++ - 1;
1807       /* shift up all the later entries in this row */
1808       for (ii=N; ii>=i; ii--) {
1809         rp[ii+1] = rp[ii];
1810         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1811       }
1812       if (N>=i) {
1813         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1814       }
1815       rp[i]                      = bcol;
1816       ap[bs2*i + bs*cidx + ridx] = value;
1817       noinsert1:;
1818       low = i;
1819     }
1820     ailen[brow] = nrow;
1821   }
1822   A->same_nonzero = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1829 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1830 {
1831   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1832   Mat            outA;
1833   PetscErrorCode ierr;
1834   PetscTruth     row_identity,col_identity;
1835 
1836   PetscFunctionBegin;
1837   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1838   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1839   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1840   if (!row_identity || !col_identity) {
1841     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1842   }
1843 
1844   outA          = inA;
1845   inA->factor   = FACTOR_LU;
1846 
1847   if (!a->diag) {
1848     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1849   }
1850 
1851   a->row        = row;
1852   a->col        = col;
1853   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1854   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1855 
1856   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1857   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1858   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1859 
1860   /*
1861       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1862       for ILU(0) factorization with natural ordering
1863   */
1864   if (inA->bs < 8) {
1865     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1866   } else {
1867     if (!a->solve_work) {
1868       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1869       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1870     }
1871   }
1872 
1873   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1874 
1875   PetscFunctionReturn(0);
1876 }
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1879 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1880 {
1881   static PetscTruth called = PETSC_FALSE;
1882   MPI_Comm          comm = A->comm;
1883   PetscErrorCode    ierr;
1884 
1885   PetscFunctionBegin;
1886   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1887   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1888   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 EXTERN_C_BEGIN
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1895 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1896 {
1897   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1898   PetscInt    i,nz,nbs;
1899 
1900   PetscFunctionBegin;
1901   nz  = baij->maxnz/baij->bs2;
1902   nbs = baij->nbs;
1903   for (i=0; i<nz; i++) {
1904     baij->j[i] = indices[i];
1905   }
1906   baij->nz = nz;
1907   for (i=0; i<nbs; i++) {
1908     baij->ilen[i] = baij->imax[i];
1909   }
1910 
1911   PetscFunctionReturn(0);
1912 }
1913 EXTERN_C_END
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1917 /*@
1918     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1919        in the matrix.
1920 
1921   Input Parameters:
1922 +  mat - the SeqBAIJ matrix
1923 -  indices - the column indices
1924 
1925   Level: advanced
1926 
1927   Notes:
1928     This can be called if you have precomputed the nonzero structure of the
1929   matrix and want to provide it to the matrix object to improve the performance
1930   of the MatSetValues() operation.
1931 
1932     You MUST have set the correct numbers of nonzeros per row in the call to
1933   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1934 
1935     MUST be called before any calls to MatSetValues();
1936 
1937 @*/
1938 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1939 {
1940   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1941 
1942   PetscFunctionBegin;
1943   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1944   PetscValidPointer(indices,2);
1945   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1946   if (f) {
1947     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1948   } else {
1949     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1950   }
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1956 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1957 {
1958   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1959   PetscErrorCode ierr;
1960   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1961   PetscReal      atmp;
1962   PetscScalar    *x,zero = 0.0;
1963   MatScalar      *aa;
1964   PetscInt       ncols,brow,krow,kcol;
1965 
1966   PetscFunctionBegin;
1967   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1968   bs   = A->bs;
1969   aa   = a->a;
1970   ai   = a->i;
1971   aj   = a->j;
1972   mbs = a->mbs;
1973 
1974   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1975   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1976   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1977   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1978   for (i=0; i<mbs; i++) {
1979     ncols = ai[1] - ai[0]; ai++;
1980     brow  = bs*i;
1981     for (j=0; j<ncols; j++){
1982       /* bcol = bs*(*aj); */
1983       for (kcol=0; kcol<bs; kcol++){
1984         for (krow=0; krow<bs; krow++){
1985           atmp = PetscAbsScalar(*aa); aa++;
1986           row = brow + krow;    /* row index */
1987           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1988           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1989         }
1990       }
1991       aj++;
1992     }
1993   }
1994   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2000 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2001 {
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2011 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2012 {
2013   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2014   PetscFunctionBegin;
2015   *array = a->a;
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2021 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2022 {
2023   PetscFunctionBegin;
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #include "petscblaslapack.h"
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2030 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
2031 {
2032   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2033   PetscErrorCode ierr;
2034   PetscInt       i,bs=Y->bs,j,bs2;
2035   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2036 
2037   PetscFunctionBegin;
2038   if (str == SAME_NONZERO_PATTERN) {
2039     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2040   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2041     if (y->xtoy && y->XtoY != X) {
2042       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2043       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2044     }
2045     if (!y->xtoy) { /* get xtoy */
2046       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2047       y->XtoY = X;
2048     }
2049     bs2 = bs*bs;
2050     for (i=0; i<x->nz; i++) {
2051       j = 0;
2052       while (j < bs2){
2053         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2054         j++;
2055       }
2056     }
2057     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2058   } else {
2059     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 /* -------------------------------------------------------------------*/
2065 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2066        MatGetRow_SeqBAIJ,
2067        MatRestoreRow_SeqBAIJ,
2068        MatMult_SeqBAIJ_N,
2069 /* 4*/ MatMultAdd_SeqBAIJ_N,
2070        MatMultTranspose_SeqBAIJ,
2071        MatMultTransposeAdd_SeqBAIJ,
2072        MatSolve_SeqBAIJ_N,
2073        0,
2074        0,
2075 /*10*/ 0,
2076        MatLUFactor_SeqBAIJ,
2077        0,
2078        0,
2079        MatTranspose_SeqBAIJ,
2080 /*15*/ MatGetInfo_SeqBAIJ,
2081        MatEqual_SeqBAIJ,
2082        MatGetDiagonal_SeqBAIJ,
2083        MatDiagonalScale_SeqBAIJ,
2084        MatNorm_SeqBAIJ,
2085 /*20*/ 0,
2086        MatAssemblyEnd_SeqBAIJ,
2087        0,
2088        MatSetOption_SeqBAIJ,
2089        MatZeroEntries_SeqBAIJ,
2090 /*25*/ MatZeroRows_SeqBAIJ,
2091        MatLUFactorSymbolic_SeqBAIJ,
2092        MatLUFactorNumeric_SeqBAIJ_N,
2093        MatCholeskyFactorSymbolic_SeqBAIJ,
2094        MatCholeskyFactorNumeric_SeqBAIJ_N,
2095 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2096        MatILUFactorSymbolic_SeqBAIJ,
2097        MatICCFactorSymbolic_SeqBAIJ,
2098        MatGetArray_SeqBAIJ,
2099        MatRestoreArray_SeqBAIJ,
2100 /*35*/ MatDuplicate_SeqBAIJ,
2101        0,
2102        0,
2103        MatILUFactor_SeqBAIJ,
2104        0,
2105 /*40*/ MatAXPY_SeqBAIJ,
2106        MatGetSubMatrices_SeqBAIJ,
2107        MatIncreaseOverlap_SeqBAIJ,
2108        MatGetValues_SeqBAIJ,
2109        0,
2110 /*45*/ MatPrintHelp_SeqBAIJ,
2111        MatScale_SeqBAIJ,
2112        0,
2113        0,
2114        0,
2115 /*50*/ 0,
2116        MatGetRowIJ_SeqBAIJ,
2117        MatRestoreRowIJ_SeqBAIJ,
2118        0,
2119        0,
2120 /*55*/ 0,
2121        0,
2122        0,
2123        0,
2124        MatSetValuesBlocked_SeqBAIJ,
2125 /*60*/ MatGetSubMatrix_SeqBAIJ,
2126        MatDestroy_SeqBAIJ,
2127        MatView_SeqBAIJ,
2128        MatGetPetscMaps_Petsc,
2129        0,
2130 /*65*/ 0,
2131        0,
2132        0,
2133        0,
2134        0,
2135 /*70*/ MatGetRowMax_SeqBAIJ,
2136        MatConvert_Basic,
2137        0,
2138        0,
2139        0,
2140 /*75*/ 0,
2141        0,
2142        0,
2143        0,
2144        0,
2145 /*80*/ 0,
2146        0,
2147        0,
2148        0,
2149        MatLoad_SeqBAIJ,
2150 /*85*/ 0,
2151        0,
2152        0,
2153        0,
2154        0,
2155 /*90*/ 0,
2156        0,
2157        0,
2158        0,
2159        0,
2160 /*95*/ 0,
2161        0,
2162        0,
2163        0};
2164 
2165 EXTERN_C_BEGIN
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2168 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2169 {
2170   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2171   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   if (aij->nonew != 1) {
2176     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2177   }
2178 
2179   /* allocate space for values if not already there */
2180   if (!aij->saved_values) {
2181     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2182   }
2183 
2184   /* copy values over */
2185   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 EXTERN_C_END
2189 
2190 EXTERN_C_BEGIN
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2193 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2194 {
2195   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2196   PetscErrorCode ierr;
2197   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2198 
2199   PetscFunctionBegin;
2200   if (aij->nonew != 1) {
2201     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2202   }
2203   if (!aij->saved_values) {
2204     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2205   }
2206 
2207   /* copy values over */
2208   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 EXTERN_C_END
2212 
2213 EXTERN_C_BEGIN
2214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2216 EXTERN_C_END
2217 
2218 EXTERN_C_BEGIN
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2221 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2222 {
2223   Mat_SeqBAIJ    *b;
2224   PetscErrorCode ierr;
2225   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2226   PetscTruth     flg;
2227 
2228   PetscFunctionBegin;
2229 
2230   B->preallocated = PETSC_TRUE;
2231   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2232   if (nnz && newbs != bs) {
2233     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2234   }
2235   bs   = newbs;
2236 
2237   mbs  = B->m/bs;
2238   nbs  = B->n/bs;
2239   bs2  = bs*bs;
2240 
2241   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2242     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2243   }
2244 
2245   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2246   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2247   if (nnz) {
2248     for (i=0; i<mbs; i++) {
2249       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2250       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2251     }
2252   }
2253 
2254   b       = (Mat_SeqBAIJ*)B->data;
2255   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2256   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2257   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2258   if (!flg) {
2259     switch (bs) {
2260     case 1:
2261       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2262       B->ops->mult            = MatMult_SeqBAIJ_1;
2263       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2264       break;
2265     case 2:
2266       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2267       B->ops->mult            = MatMult_SeqBAIJ_2;
2268       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2269       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2270       break;
2271     case 3:
2272       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2273       B->ops->mult            = MatMult_SeqBAIJ_3;
2274       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2275       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2276       break;
2277     case 4:
2278       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2279       B->ops->mult            = MatMult_SeqBAIJ_4;
2280       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2281       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2282       break;
2283     case 5:
2284       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2285       B->ops->mult            = MatMult_SeqBAIJ_5;
2286       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2287       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2288       break;
2289     case 6:
2290       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2291       B->ops->mult            = MatMult_SeqBAIJ_6;
2292       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2293       break;
2294     case 7:
2295       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2296       B->ops->mult            = MatMult_SeqBAIJ_7;
2297       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2298       break;
2299     default:
2300       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2301       B->ops->mult            = MatMult_SeqBAIJ_N;
2302       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2303       break;
2304     }
2305   }
2306   B->bs      = bs;
2307   b->mbs     = mbs;
2308   b->nbs     = nbs;
2309   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2310   if (!nnz) {
2311     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2312     else if (nz <= 0)        nz = 1;
2313     for (i=0; i<mbs; i++) b->imax[i] = nz;
2314     nz = nz*mbs;
2315   } else {
2316     nz = 0;
2317     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2318   }
2319 
2320   /* allocate the matrix space */
2321   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2322   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2323   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2324   b->j  = (PetscInt*)(b->a + nz*bs2);
2325   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2326   b->i  = b->j + nz;
2327   b->singlemalloc = PETSC_TRUE;
2328 
2329   b->i[0] = 0;
2330   for (i=1; i<mbs+1; i++) {
2331     b->i[i] = b->i[i-1] + b->imax[i-1];
2332   }
2333 
2334   /* b->ilen will count nonzeros in each block row so far. */
2335   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2336   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2337   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2338 
2339   B->bs               = bs;
2340   b->bs2              = bs2;
2341   b->mbs              = mbs;
2342   b->nz               = 0;
2343   b->maxnz            = nz*bs2;
2344   B->info.nz_unneeded = (PetscReal)b->maxnz;
2345   PetscFunctionReturn(0);
2346 }
2347 EXTERN_C_END
2348 
2349 /*MC
2350    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2351    block sparse compressed row format.
2352 
2353    Options Database Keys:
2354 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2355 
2356   Level: beginner
2357 
2358 .seealso: MatCreateSeqBAIJ
2359 M*/
2360 
2361 EXTERN_C_BEGIN
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatCreate_SeqBAIJ"
2364 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2365 {
2366   PetscErrorCode ierr;
2367   PetscMPIInt    size;
2368   Mat_SeqBAIJ    *b;
2369 
2370   PetscFunctionBegin;
2371   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2372   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2373 
2374   B->m = B->M = PetscMax(B->m,B->M);
2375   B->n = B->N = PetscMax(B->n,B->N);
2376   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2377   B->data = (void*)b;
2378   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2379   B->factor           = 0;
2380   B->mapping          = 0;
2381   b->row              = 0;
2382   b->col              = 0;
2383   b->icol             = 0;
2384   b->reallocs         = 0;
2385   b->saved_values     = 0;
2386 #if defined(PETSC_USE_MAT_SINGLE)
2387   b->setvalueslen     = 0;
2388   b->setvaluescopy    = PETSC_NULL;
2389 #endif
2390 
2391   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2392   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2393 
2394   b->sorted           = PETSC_FALSE;
2395   b->roworiented      = PETSC_TRUE;
2396   b->nonew            = 0;
2397   b->diag             = 0;
2398   b->solve_work       = 0;
2399   b->mult_work        = 0;
2400   B->spptr            = 0;
2401   B->info.nz_unneeded = (PetscReal)b->maxnz;
2402   b->keepzeroedrows   = PETSC_FALSE;
2403   b->xtoy              = 0;
2404   b->XtoY              = 0;
2405   b->compressedrow.use     = PETSC_FALSE;
2406   b->compressedrow.nrows   = 0;
2407   b->compressedrow.i       = PETSC_NULL;
2408   b->compressedrow.rindex  = PETSC_NULL;
2409   b->compressedrow.checked = PETSC_FALSE;
2410   B->same_nonzero          = PETSC_FALSE;
2411 
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2413                                      "MatInvertBlockDiagonal_SeqBAIJ",
2414                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2416                                      "MatStoreValues_SeqBAIJ",
2417                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2419                                      "MatRetrieveValues_SeqBAIJ",
2420                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2421   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2422                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2423                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2425                                      "MatConvert_SeqBAIJ_SeqAIJ",
2426                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2427 #if !defined(PETSC_USE_64BIT_INT)
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2429                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2430                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2431 #endif
2432   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2433                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2434                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 EXTERN_C_END
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2441 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2442 {
2443   Mat            C;
2444   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2447 
2448   PetscFunctionBegin;
2449   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2450 
2451   *B = 0;
2452   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2453   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2454   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2455   c    = (Mat_SeqBAIJ*)C->data;
2456 
2457   C->M   = A->M;
2458   C->N   = A->N;
2459   C->bs  = A->bs;
2460   c->bs2 = a->bs2;
2461   c->mbs = a->mbs;
2462   c->nbs = a->nbs;
2463 
2464   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2465   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2466   for (i=0; i<mbs; i++) {
2467     c->imax[i] = a->imax[i];
2468     c->ilen[i] = a->ilen[i];
2469   }
2470 
2471   /* allocate the matrix space */
2472   c->singlemalloc = PETSC_TRUE;
2473   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2474   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2475   c->j = (PetscInt*)(c->a + nz*bs2);
2476   c->i = c->j + nz;
2477   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2478   if (mbs > 0) {
2479     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2480     if (cpvalues == MAT_COPY_VALUES) {
2481       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2482     } else {
2483       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2484     }
2485   }
2486 
2487   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2488   c->sorted      = a->sorted;
2489   c->roworiented = a->roworiented;
2490   c->nonew       = a->nonew;
2491 
2492   if (a->diag) {
2493     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2494     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2495     for (i=0; i<mbs; i++) {
2496       c->diag[i] = a->diag[i];
2497     }
2498   } else c->diag        = 0;
2499   c->nz                 = a->nz;
2500   c->maxnz              = a->maxnz;
2501   c->solve_work         = 0;
2502   c->mult_work          = 0;
2503   C->preallocated       = PETSC_TRUE;
2504   C->assembled          = PETSC_TRUE;
2505 
2506   c->compressedrow.use     = a->compressedrow.use;
2507   c->compressedrow.nrows   = a->compressedrow.nrows;
2508   c->compressedrow.checked = a->compressedrow.checked;
2509   if ( a->compressedrow.checked && a->compressedrow.use){
2510     i = a->compressedrow.nrows;
2511     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2512     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2513     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2514     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2515   } else {
2516     c->compressedrow.use    = PETSC_FALSE;
2517     c->compressedrow.i      = PETSC_NULL;
2518     c->compressedrow.rindex = PETSC_NULL;
2519   }
2520   C->same_nonzero = A->same_nonzero;
2521   *B = C;
2522   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatLoad_SeqBAIJ"
2528 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2529 {
2530   Mat_SeqBAIJ    *a;
2531   Mat            B;
2532   PetscErrorCode ierr;
2533   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2534   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2535   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2536   PetscInt       *masked,nmask,tmp,bs2,ishift;
2537   PetscMPIInt    size;
2538   int            fd;
2539   PetscScalar    *aa;
2540   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2541 
2542   PetscFunctionBegin;
2543   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2544   bs2  = bs*bs;
2545 
2546   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2547   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2548   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2549   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2550   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2551   M = header[1]; N = header[2]; nz = header[3];
2552 
2553   if (header[3] < 0) {
2554     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2555   }
2556 
2557   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2558 
2559   /*
2560      This code adds extra rows to make sure the number of rows is
2561     divisible by the blocksize
2562   */
2563   mbs        = M/bs;
2564   extra_rows = bs - M + bs*(mbs);
2565   if (extra_rows == bs) extra_rows = 0;
2566   else                  mbs++;
2567   if (extra_rows) {
2568     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2569   }
2570 
2571   /* read in row lengths */
2572   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2573   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2574   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2575 
2576   /* read in column indices */
2577   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2578   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2579   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2580 
2581   /* loop over row lengths determining block row lengths */
2582   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2583   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2584   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2585   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2586   masked   = mask + mbs;
2587   rowcount = 0; nzcount = 0;
2588   for (i=0; i<mbs; i++) {
2589     nmask = 0;
2590     for (j=0; j<bs; j++) {
2591       kmax = rowlengths[rowcount];
2592       for (k=0; k<kmax; k++) {
2593         tmp = jj[nzcount++]/bs;
2594         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2595       }
2596       rowcount++;
2597     }
2598     browlengths[i] += nmask;
2599     /* zero out the mask elements we set */
2600     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2601   }
2602 
2603   /* create our matrix */
2604   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2605   ierr = MatSetType(B,type);CHKERRQ(ierr);
2606   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2607   a = (Mat_SeqBAIJ*)B->data;
2608 
2609   /* set matrix "i" values */
2610   a->i[0] = 0;
2611   for (i=1; i<= mbs; i++) {
2612     a->i[i]      = a->i[i-1] + browlengths[i-1];
2613     a->ilen[i-1] = browlengths[i-1];
2614   }
2615   a->nz         = 0;
2616   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2617 
2618   /* read in nonzero values */
2619   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2620   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2621   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2622 
2623   /* set "a" and "j" values into matrix */
2624   nzcount = 0; jcount = 0;
2625   for (i=0; i<mbs; i++) {
2626     nzcountb = nzcount;
2627     nmask    = 0;
2628     for (j=0; j<bs; j++) {
2629       kmax = rowlengths[i*bs+j];
2630       for (k=0; k<kmax; k++) {
2631         tmp = jj[nzcount++]/bs;
2632 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2633       }
2634     }
2635     /* sort the masked values */
2636     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2637 
2638     /* set "j" values into matrix */
2639     maskcount = 1;
2640     for (j=0; j<nmask; j++) {
2641       a->j[jcount++]  = masked[j];
2642       mask[masked[j]] = maskcount++;
2643     }
2644     /* set "a" values into matrix */
2645     ishift = bs2*a->i[i];
2646     for (j=0; j<bs; j++) {
2647       kmax = rowlengths[i*bs+j];
2648       for (k=0; k<kmax; k++) {
2649         tmp       = jj[nzcountb]/bs ;
2650         block     = mask[tmp] - 1;
2651         point     = jj[nzcountb] - bs*tmp;
2652         idx       = ishift + bs2*block + j + bs*point;
2653         a->a[idx] = (MatScalar)aa[nzcountb++];
2654       }
2655     }
2656     /* zero out the mask elements we set */
2657     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2658   }
2659   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2660 
2661   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2662   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2663   ierr = PetscFree(aa);CHKERRQ(ierr);
2664   ierr = PetscFree(jj);CHKERRQ(ierr);
2665   ierr = PetscFree(mask);CHKERRQ(ierr);
2666 
2667   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2668   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2669   ierr = MatView_Private(B);CHKERRQ(ierr);
2670 
2671   *A = B;
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 #undef __FUNCT__
2676 #define __FUNCT__ "MatCreateSeqBAIJ"
2677 /*@C
2678    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2679    compressed row) format.  For good matrix assembly performance the
2680    user should preallocate the matrix storage by setting the parameter nz
2681    (or the array nnz).  By setting these parameters accurately, performance
2682    during matrix assembly can be increased by more than a factor of 50.
2683 
2684    Collective on MPI_Comm
2685 
2686    Input Parameters:
2687 +  comm - MPI communicator, set to PETSC_COMM_SELF
2688 .  bs - size of block
2689 .  m - number of rows
2690 .  n - number of columns
2691 .  nz - number of nonzero blocks  per block row (same for all rows)
2692 -  nnz - array containing the number of nonzero blocks in the various block rows
2693          (possibly different for each block row) or PETSC_NULL
2694 
2695    Output Parameter:
2696 .  A - the matrix
2697 
2698    Options Database Keys:
2699 .   -mat_no_unroll - uses code that does not unroll the loops in the
2700                      block calculations (much slower)
2701 .    -mat_block_size - size of the blocks to use
2702 
2703    Level: intermediate
2704 
2705    Notes:
2706    The number of rows and columns must be divisible by blocksize.
2707 
2708    If the nnz parameter is given then the nz parameter is ignored
2709 
2710    A nonzero block is any block that as 1 or more nonzeros in it
2711 
2712    The block AIJ format is fully compatible with standard Fortran 77
2713    storage.  That is, the stored row and column indices can begin at
2714    either one (as in Fortran) or zero.  See the users' manual for details.
2715 
2716    Specify the preallocated storage with either nz or nnz (not both).
2717    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2718    allocation.  For additional details, see the users manual chapter on
2719    matrices.
2720 
2721 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2722 @*/
2723 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2724 {
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2729   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2730   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2736 /*@C
2737    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2738    per row in the matrix. For good matrix assembly performance the
2739    user should preallocate the matrix storage by setting the parameter nz
2740    (or the array nnz).  By setting these parameters accurately, performance
2741    during matrix assembly can be increased by more than a factor of 50.
2742 
2743    Collective on MPI_Comm
2744 
2745    Input Parameters:
2746 +  A - the matrix
2747 .  bs - size of block
2748 .  nz - number of block nonzeros per block row (same for all rows)
2749 -  nnz - array containing the number of block nonzeros in the various block rows
2750          (possibly different for each block row) or PETSC_NULL
2751 
2752    Options Database Keys:
2753 .   -mat_no_unroll - uses code that does not unroll the loops in the
2754                      block calculations (much slower)
2755 .    -mat_block_size - size of the blocks to use
2756 
2757    Level: intermediate
2758 
2759    Notes:
2760    If the nnz parameter is given then the nz parameter is ignored
2761 
2762    The block AIJ format is fully compatible with standard Fortran 77
2763    storage.  That is, the stored row and column indices can begin at
2764    either one (as in Fortran) or zero.  See the users' manual for details.
2765 
2766    Specify the preallocated storage with either nz or nnz (not both).
2767    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2768    allocation.  For additional details, see the users manual chapter on
2769    matrices.
2770 
2771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2772 @*/
2773 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2774 {
2775   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2776 
2777   PetscFunctionBegin;
2778   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2779   if (f) {
2780     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2781   }
2782   PetscFunctionReturn(0);
2783 }
2784 
2785