xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 43516a2dbae745ae94faf364ea7a6942f760589a)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@C
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
29   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
49   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
50 
51   PetscFunctionBegin;
52   if (a->idiagvalid) PetscFunctionReturn(0);
53   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54   diag_offset = a->diag;
55   if (!a->idiag) {
56     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57   }
58   diag  = a->idiag;
59   mdiag = a->idiag+bs*bs*mbs;
60   /* factor and invert each block */
61   switch (bs){
62     case 2:
63       for (i=0; i<mbs; i++) {
64         odiag   = v + 4*diag_offset[i];
65         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
66 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
67 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
68 	diag    += 4;
69 	mdiag   += 4;
70       }
71       break;
72     case 3:
73       for (i=0; i<mbs; i++) {
74         odiag    = v + 9*diag_offset[i];
75         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
76         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
77         diag[8]  = odiag[8];
78         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
80         mdiag[8] = odiag[8];
81 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
82 	diag    += 9;
83 	mdiag   += 9;
84       }
85       break;
86     case 4:
87       for (i=0; i<mbs; i++) {
88         odiag  = v + 16*diag_offset[i];
89         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
90         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
91 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
92 	diag  += 16;
93 	mdiag += 16;
94       }
95       break;
96     case 5:
97       for (i=0; i<mbs; i++) {
98         odiag = v + 25*diag_offset[i];
99         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
100         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
101 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
102 	diag  += 25;
103 	mdiag += 25;
104       }
105       break;
106     default:
107       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
108   }
109   a->idiagvalid = PETSC_TRUE;
110   PetscFunctionReturn(0);
111 }
112 EXTERN_C_END
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
116 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
117 {
118   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
119   PetscScalar        *x,x1,x2,s1,s2;
120   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
121   PetscErrorCode     ierr;
122   PetscInt           m = a->mbs,i,i2,nz,idx;
123   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
124 
125   PetscFunctionBegin;
126   its = its*lits;
127   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
128   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
129   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
130   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
131   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
132 
133   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
134 
135   diag  = a->diag;
136   idiag = a->idiag;
137   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
138   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
139 
140   if (flag & SOR_ZERO_INITIAL_GUESS) {
141     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
142       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
143       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
144       i2     = 2;
145       idiag += 4;
146       for (i=1; i<m; i++) {
147 	v     = aa + 4*ai[i];
148 	vi    = aj + ai[i];
149 	nz    = diag[i] - ai[i];
150 	s1    = b[i2]; s2 = b[i2+1];
151 	while (nz--) {
152 	  idx  = 2*(*vi++);
153 	  x1   = x[idx]; x2 = x[1+idx];
154 	  s1  -= v[0]*x1 + v[2]*x2;
155 	  s2  -= v[1]*x1 + v[3]*x2;
156 	  v   += 4;
157 	}
158 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
159 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
160         idiag   += 4;
161         i2      += 2;
162       }
163       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
165     }
166     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168       i2    = 0;
169       mdiag = a->idiag+4*a->mbs;
170       for (i=0; i<m; i++) {
171         x1      = x[i2]; x2 = x[i2+1];
172         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
173         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
174         mdiag  += 4;
175         i2     += 2;
176       }
177       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
178     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
180     }
181     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
182       idiag   = a->idiag+4*a->mbs - 4;
183       i2      = 2*m - 2;
184       x1      = x[i2]; x2 = x[i2+1];
185       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
186       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
187       idiag -= 4;
188       i2    -= 2;
189       for (i=m-2; i>=0; i--) {
190 	v     = aa + 4*(diag[i]+1);
191 	vi    = aj + diag[i] + 1;
192 	nz    = ai[i+1] - diag[i] - 1;
193 	s1    = x[i2]; s2 = x[i2+1];
194 	while (nz--) {
195 	  idx  = 2*(*vi++);
196 	  x1   = x[idx]; x2 = x[1+idx];
197 	  s1  -= v[0]*x1 + v[2]*x2;
198 	  s2  -= v[1]*x1 + v[3]*x2;
199 	  v   += 4;
200 	}
201 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
202 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
203         idiag   -= 4;
204         i2      -= 2;
205       }
206       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
207     }
208   } else {
209     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
210   }
211   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
212   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
218 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
219 {
220   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
221   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
222   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
223   PetscErrorCode     ierr;
224   PetscInt           m = a->mbs,i,i2,nz,idx;
225   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
226 
227   PetscFunctionBegin;
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
246       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
247       i2     = 3;
248       idiag += 9;
249       for (i=1; i<m; i++) {
250 	v     = aa + 9*ai[i];
251 	vi    = aj + ai[i];
252 	nz    = diag[i] - ai[i];
253 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
254 	while (nz--) {
255 	  idx  = 3*(*vi++);
256 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
257 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
258 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
259 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
260 	  v   += 9;
261 	}
262 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
263 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
264 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
265         idiag   += 9;
266         i2      += 3;
267       }
268       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
269       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
270     }
271     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
272         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
273       i2    = 0;
274       mdiag = a->idiag+9*a->mbs;
275       for (i=0; i<m; i++) {
276         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
277         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
278         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
279         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
280         mdiag  += 9;
281         i2     += 3;
282       }
283       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+9*a->mbs - 9;
289       i2      = 3*m - 3;
290       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
291       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
292       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
293       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
294       idiag -= 9;
295       i2    -= 3;
296       for (i=m-2; i>=0; i--) {
297 	v     = aa + 9*(diag[i]+1);
298 	vi    = aj + diag[i] + 1;
299 	nz    = ai[i+1] - diag[i] - 1;
300 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
301 	while (nz--) {
302 	  idx  = 3*(*vi++);
303 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
304 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
305 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
306 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
307 	  v   += 9;
308 	}
309 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
310 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
311 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
312         idiag   -= 9;
313         i2      -= 3;
314       }
315       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
316     }
317   } else {
318     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
319   }
320   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
321   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
327 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
328 {
329   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
330   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
331   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
332   PetscErrorCode     ierr;
333   PetscInt           m = a->mbs,i,i2,nz,idx;
334   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
335 
336   PetscFunctionBegin;
337   its = its*lits;
338   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
339   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
340   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
341   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
342   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
343 
344   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
345 
346   diag  = a->diag;
347   idiag = a->idiag;
348   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
350 
351   if (flag & SOR_ZERO_INITIAL_GUESS) {
352     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
353       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
354       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
355       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
356       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
357       i2     = 4;
358       idiag += 16;
359       for (i=1; i<m; i++) {
360 	v     = aa + 16*ai[i];
361 	vi    = aj + ai[i];
362 	nz    = diag[i] - ai[i];
363 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
364 	while (nz--) {
365 	  idx  = 4*(*vi++);
366 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
367 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
368 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
369 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
370 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
371 	  v   += 16;
372 	}
373 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
374 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
375 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
376 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
377         idiag   += 16;
378         i2      += 4;
379       }
380       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
381       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
382     }
383     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
384         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
385       i2    = 0;
386       mdiag = a->idiag+16*a->mbs;
387       for (i=0; i<m; i++) {
388         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
389         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
390         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
391         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
392         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
393         mdiag  += 16;
394         i2     += 4;
395       }
396       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
397     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
398       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
399     }
400     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
401       idiag   = a->idiag+16*a->mbs - 16;
402       i2      = 4*m - 4;
403       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
404       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
405       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
406       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
407       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
408       idiag -= 16;
409       i2    -= 4;
410       for (i=m-2; i>=0; i--) {
411 	v     = aa + 16*(diag[i]+1);
412 	vi    = aj + diag[i] + 1;
413 	nz    = ai[i+1] - diag[i] - 1;
414 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
415 	while (nz--) {
416 	  idx  = 4*(*vi++);
417 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
418 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
419 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
420 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
421 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
422 	  v   += 16;
423 	}
424 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
425 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
426 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
427 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
428         idiag   -= 16;
429         i2      -= 4;
430       }
431       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
432     }
433   } else {
434     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435   }
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
443 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444 {
445   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
447   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
448   PetscErrorCode     ierr;
449   PetscInt           m = a->mbs,i,i2,nz,idx;
450   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
451 
452   PetscFunctionBegin;
453   its = its*lits;
454   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
455   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
456   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
457   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
458   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
459 
460   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
461 
462   diag  = a->diag;
463   idiag = a->idiag;
464   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
465   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
466 
467   if (flag & SOR_ZERO_INITIAL_GUESS) {
468     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
469       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
470       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
471       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
472       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
473       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
474       i2     = 5;
475       idiag += 25;
476       for (i=1; i<m; i++) {
477 	v     = aa + 25*ai[i];
478 	vi    = aj + ai[i];
479 	nz    = diag[i] - ai[i];
480 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
481 	while (nz--) {
482 	  idx  = 5*(*vi++);
483 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
484 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
485 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
486 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
487 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
488 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
489 	  v   += 25;
490 	}
491 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
492 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
493 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
494 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
495 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
496         idiag   += 25;
497         i2      += 5;
498       }
499       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
501     }
502     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504       i2    = 0;
505       mdiag = a->idiag+25*a->mbs;
506       for (i=0; i<m; i++) {
507         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
508         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
509         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
510         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
511         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
512         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
513         mdiag  += 25;
514         i2     += 5;
515       }
516       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
517     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
518       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
519     }
520     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
521       idiag   = a->idiag+25*a->mbs - 25;
522       i2      = 5*m - 5;
523       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
524       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
525       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
526       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
527       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
528       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
529       idiag -= 25;
530       i2    -= 5;
531       for (i=m-2; i>=0; i--) {
532 	v     = aa + 25*(diag[i]+1);
533 	vi    = aj + diag[i] + 1;
534 	nz    = ai[i+1] - diag[i] - 1;
535 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
536 	while (nz--) {
537 	  idx  = 5*(*vi++);
538 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
539 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
540 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
541 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
542 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
543 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
544 	  v   += 25;
545 	}
546 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
547 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
548 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
549 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
550 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
551         idiag   -= 25;
552         i2      -= 5;
553       }
554       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
555     }
556   } else {
557     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
558   }
559   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
560   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565     Special version for Fun3d sequential benchmark
566 */
567 #if defined(PETSC_HAVE_FORTRAN_CAPS)
568 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
569 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
570 #define matsetvaluesblocked4_ matsetvaluesblocked4
571 #endif
572 
573 EXTERN_C_BEGIN
574 #undef __FUNCT__
575 #define __FUNCT__ "matsetvaluesblocked4_"
576 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
577 {
578   Mat               A = *AA;
579   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
580   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
581   PetscInt          *ai=a->i,*ailen=a->ilen;
582   PetscInt          *aj=a->j,stepval;
583   const PetscScalar *value = v;
584   MatScalar         *ap,*aa = a->a,*bap;
585 
586   PetscFunctionBegin;
587   stepval = (n-1)*4;
588   for (k=0; k<m; k++) { /* loop over added rows */
589     row  = im[k];
590     rp   = aj + ai[row];
591     ap   = aa + 16*ai[row];
592     nrow = ailen[row];
593     low  = 0;
594     for (l=0; l<n; l++) { /* loop over added columns */
595       col = in[l];
596       value = v + k*(stepval+4)*4 + l*4;
597       low = 0; high = nrow;
598       while (high-low > 7) {
599         t = (low+high)/2;
600         if (rp[t] > col) high = t;
601         else             low  = t;
602       }
603       for (i=low; i<high; i++) {
604         if (rp[i] > col) break;
605         if (rp[i] == col) {
606           bap  = ap +  16*i;
607           for (ii=0; ii<4; ii++,value+=stepval) {
608             for (jj=ii; jj<16; jj+=4) {
609               bap[jj] += *value++;
610             }
611           }
612           goto noinsert2;
613         }
614       }
615       N = nrow++ - 1;
616       /* shift up all the later entries in this row */
617       for (ii=N; ii>=i; ii--) {
618         rp[ii+1] = rp[ii];
619         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
620       }
621       if (N >= i) {
622         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
623       }
624       rp[i] = col;
625       bap   = ap +  16*i;
626       for (ii=0; ii<4; ii++,value+=stepval) {
627         for (jj=ii; jj<16; jj+=4) {
628           bap[jj] = *value++;
629         }
630       }
631       noinsert2:;
632       low = i;
633     }
634     ailen[row] = nrow;
635   }
636 }
637 EXTERN_C_END
638 
639 #if defined(PETSC_HAVE_FORTRAN_CAPS)
640 #define matsetvalues4_ MATSETVALUES4
641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
642 #define matsetvalues4_ matsetvalues4
643 #endif
644 
645 EXTERN_C_BEGIN
646 #undef __FUNCT__
647 #define __FUNCT__ "MatSetValues4_"
648 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
649 {
650   Mat         A = *AA;
651   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
652   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
653   PetscInt    *ai=a->i,*ailen=a->ilen;
654   PetscInt    *aj=a->j,brow,bcol;
655   PetscInt    ridx,cidx;
656   MatScalar   *ap,value,*aa=a->a,*bap;
657 
658   PetscFunctionBegin;
659   for (k=0; k<m; k++) { /* loop over added rows */
660     row  = im[k]; brow = row/4;
661     rp   = aj + ai[brow];
662     ap   = aa + 16*ai[brow];
663     nrow = ailen[brow];
664     low  = 0;
665     for (l=0; l<n; l++) { /* loop over added columns */
666       col = in[l]; bcol = col/4;
667       ridx = row % 4; cidx = col % 4;
668       value = v[l + k*n];
669       low = 0; high = nrow;
670       while (high-low > 7) {
671         t = (low+high)/2;
672         if (rp[t] > bcol) high = t;
673         else              low  = t;
674       }
675       for (i=low; i<high; i++) {
676         if (rp[i] > bcol) break;
677         if (rp[i] == bcol) {
678           bap  = ap +  16*i + 4*cidx + ridx;
679           *bap += value;
680           goto noinsert1;
681         }
682       }
683       N = nrow++ - 1;
684       /* shift up all the later entries in this row */
685       for (ii=N; ii>=i; ii--) {
686         rp[ii+1] = rp[ii];
687         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
688       }
689       if (N>=i) {
690         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
691       }
692       rp[i]                    = bcol;
693       ap[16*i + 4*cidx + ridx] = value;
694       noinsert1:;
695       low = i;
696     }
697     ailen[brow] = nrow;
698   }
699 }
700 EXTERN_C_END
701 
702 /*  UGLY, ugly, ugly
703    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
704    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
705    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
706    converts the entries into single precision and then calls ..._MatScalar() to put them
707    into the single precision data structures.
708 */
709 #if defined(PETSC_USE_MAT_SINGLE)
710 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
711 #else
712 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
713 #endif
714 
715 #define CHUNKSIZE  10
716 
717 /*
718      Checks for missing diagonals
719 */
720 #undef __FUNCT__
721 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
722 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
723 {
724   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
725   PetscErrorCode ierr;
726   PetscInt       *diag,*jj = a->j,i;
727 
728   PetscFunctionBegin;
729   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
730   diag = a->diag;
731   for (i=0; i<a->mbs; i++) {
732     if (jj[diag[i]] != i) {
733       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
734     }
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
741 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
742 {
743   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
744   PetscErrorCode ierr;
745   PetscInt       i,j,*diag,m = a->mbs;
746 
747   PetscFunctionBegin;
748   if (a->diag) PetscFunctionReturn(0);
749 
750   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
751   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
752   for (i=0; i<m; i++) {
753     diag[i] = a->i[i+1];
754     for (j=a->i[i]; j<a->i[i+1]; j++) {
755       if (a->j[j] == i) {
756         diag[i] = j;
757         break;
758       }
759     }
760   }
761   a->diag = diag;
762   PetscFunctionReturn(0);
763 }
764 
765 
766 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
770 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
771 {
772   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
773   PetscErrorCode ierr;
774   PetscInt       n = a->mbs,i;
775 
776   PetscFunctionBegin;
777   *nn = n;
778   if (!ia) PetscFunctionReturn(0);
779   if (symmetric) {
780     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
781   } else if (oshift == 1) {
782     /* temporarily add 1 to i and j indices */
783     PetscInt nz = a->i[n];
784     for (i=0; i<nz; i++) a->j[i]++;
785     for (i=0; i<n+1; i++) a->i[i]++;
786     *ia = a->i; *ja = a->j;
787   } else {
788     *ia = a->i; *ja = a->j;
789   }
790 
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
796 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
797 {
798   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       i,n = a->mbs;
801 
802   PetscFunctionBegin;
803   if (!ia) PetscFunctionReturn(0);
804   if (symmetric) {
805     ierr = PetscFree(*ia);CHKERRQ(ierr);
806     ierr = PetscFree(*ja);CHKERRQ(ierr);
807   } else if (oshift == 1) {
808     PetscInt nz = a->i[n]-1;
809     for (i=0; i<nz; i++) a->j[i]--;
810     for (i=0; i<n+1; i++) a->i[i]--;
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatDestroy_SeqBAIJ"
817 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
818 {
819   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
820   PetscErrorCode ierr;
821 
822   PetscFunctionBegin;
823 #if defined(PETSC_USE_LOG)
824   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
825 #endif
826   ierr = PetscFree(a->a);CHKERRQ(ierr);
827   if (!a->singlemalloc) {
828     ierr = PetscFree(a->i);CHKERRQ(ierr);
829     ierr = PetscFree(a->j);CHKERRQ(ierr);
830   }
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
840   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
841   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
842   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
843   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
844   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
845 #if defined(PETSC_USE_MAT_SINGLE)
846   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
847 #endif
848   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
849   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
850 
851   ierr = PetscFree(a);CHKERRQ(ierr);
852 
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatSetOption_SeqBAIJ"
865 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
866 {
867   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   switch (op) {
872   case MAT_ROW_ORIENTED:
873     a->roworiented    = PETSC_TRUE;
874     break;
875   case MAT_COLUMN_ORIENTED:
876     a->roworiented    = PETSC_FALSE;
877     break;
878   case MAT_COLUMNS_SORTED:
879     a->sorted         = PETSC_TRUE;
880     break;
881   case MAT_COLUMNS_UNSORTED:
882     a->sorted         = PETSC_FALSE;
883     break;
884   case MAT_KEEP_ZEROED_ROWS:
885     a->keepzeroedrows = PETSC_TRUE;
886     break;
887   case MAT_NO_NEW_NONZERO_LOCATIONS:
888     a->nonew          = 1;
889     break;
890   case MAT_NEW_NONZERO_LOCATION_ERR:
891     a->nonew          = -1;
892     break;
893   case MAT_NEW_NONZERO_ALLOCATION_ERR:
894     a->nonew          = -2;
895     break;
896   case MAT_YES_NEW_NONZERO_LOCATIONS:
897     a->nonew          = 0;
898     break;
899   case MAT_ROWS_SORTED:
900   case MAT_ROWS_UNSORTED:
901   case MAT_YES_NEW_DIAGONALS:
902   case MAT_IGNORE_OFF_PROC_ENTRIES:
903   case MAT_USE_HASH_TABLE:
904     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
905     break;
906   case MAT_NO_NEW_DIAGONALS:
907     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
908   case MAT_SYMMETRIC:
909   case MAT_STRUCTURALLY_SYMMETRIC:
910   case MAT_NOT_SYMMETRIC:
911   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
912   case MAT_HERMITIAN:
913   case MAT_NOT_HERMITIAN:
914   case MAT_SYMMETRY_ETERNAL:
915   case MAT_NOT_SYMMETRY_ETERNAL:
916     break;
917   default:
918     SETERRQ(PETSC_ERR_SUP,"unknown option");
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatGetRow_SeqBAIJ"
925 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
926 {
927   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
930   MatScalar      *aa,*aa_i;
931   PetscScalar    *v_i;
932 
933   PetscFunctionBegin;
934   bs  = A->bs;
935   ai  = a->i;
936   aj  = a->j;
937   aa  = a->a;
938   bs2 = a->bs2;
939 
940   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
941 
942   bn  = row/bs;   /* Block number */
943   bp  = row % bs; /* Block Position */
944   M   = ai[bn+1] - ai[bn];
945   *nz = bs*M;
946 
947   if (v) {
948     *v = 0;
949     if (*nz) {
950       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
951       for (i=0; i<M; i++) { /* for each block in the block row */
952         v_i  = *v + i*bs;
953         aa_i = aa + bs2*(ai[bn] + i);
954         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
955       }
956     }
957   }
958 
959   if (idx) {
960     *idx = 0;
961     if (*nz) {
962       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
963       for (i=0; i<M; i++) { /* for each block in the block row */
964         idx_i = *idx + i*bs;
965         itmp  = bs*aj[ai[bn] + i];
966         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
967       }
968     }
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
975 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
976 {
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
981   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatTranspose_SeqBAIJ"
987 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
988 {
989   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
990   Mat            C;
991   PetscErrorCode ierr;
992   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
993   PetscInt       *rows,*cols,bs2=a->bs2;
994   PetscScalar    *array;
995 
996   PetscFunctionBegin;
997   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
998   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
999   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1000 
1001 #if defined(PETSC_USE_MAT_SINGLE)
1002   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1003   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1004 #else
1005   array = a->a;
1006 #endif
1007 
1008   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1009   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1010   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012   ierr = PetscFree(col);CHKERRQ(ierr);
1013   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1014   cols = rows + bs;
1015   for (i=0; i<mbs; i++) {
1016     cols[0] = i*bs;
1017     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018     len = ai[i+1] - ai[i];
1019     for (j=0; j<len; j++) {
1020       rows[0] = (*aj++)*bs;
1021       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1023       array += bs2;
1024     }
1025   }
1026   ierr = PetscFree(rows);CHKERRQ(ierr);
1027 #if defined(PETSC_USE_MAT_SINGLE)
1028   ierr = PetscFree(array);
1029 #endif
1030 
1031   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033 
1034   if (B) {
1035     *B = C;
1036   } else {
1037     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045 {
1046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1047   PetscErrorCode ierr;
1048   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049   int            fd;
1050   PetscScalar    *aa;
1051   FILE           *file;
1052 
1053   PetscFunctionBegin;
1054   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056   col_lens[0] = MAT_FILE_COOKIE;
1057 
1058   col_lens[1] = A->m;
1059   col_lens[2] = A->n;
1060   col_lens[3] = a->nz*bs2;
1061 
1062   /* store lengths of each row and write (including header) to file */
1063   count = 0;
1064   for (i=0; i<a->mbs; i++) {
1065     for (j=0; j<bs; j++) {
1066       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067     }
1068   }
1069   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1071 
1072   /* store column indices (zero start index) */
1073   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078         for (l=0; l<bs; l++) {
1079           jj[count++] = bs*a->j[k] + l;
1080         }
1081       }
1082     }
1083   }
1084   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085   ierr = PetscFree(jj);CHKERRQ(ierr);
1086 
1087   /* store nonzero values */
1088   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089   count = 0;
1090   for (i=0; i<a->mbs; i++) {
1091     for (j=0; j<bs; j++) {
1092       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093         for (l=0; l<bs; l++) {
1094           aa[count++] = a->a[bs2*k + l*bs + j];
1095         }
1096       }
1097     }
1098   }
1099   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100   ierr = PetscFree(aa);CHKERRQ(ierr);
1101 
1102   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103   if (file) {
1104     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112 {
1113   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode    ierr;
1115   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116   PetscViewerFormat format;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123     Mat aij;
1124     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126     ierr = MatDestroy(aij);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128      PetscFunctionReturn(0);
1129   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1131     for (i=0; i<a->mbs; i++) {
1132       for (j=0; j<bs; j++) {
1133         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1134         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135           for (l=0; l<bs; l++) {
1136 #if defined(PETSC_USE_COMPLEX)
1137             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1143             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             }
1146 #else
1147             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1149             }
1150 #endif
1151           }
1152         }
1153         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1154       }
1155     }
1156     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159     for (i=0; i<a->mbs; i++) {
1160       for (j=0; j<bs; j++) {
1161         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163           for (l=0; l<bs; l++) {
1164 #if defined(PETSC_USE_COMPLEX)
1165             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1171             } else {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             }
1174 #else
1175             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1176 #endif
1177           }
1178         }
1179         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1180       }
1181     }
1182     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183   }
1184   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191 {
1192   Mat            A = (Mat) Aa;
1193   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1194   PetscErrorCode ierr;
1195   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197   MatScalar      *aa;
1198   PetscViewer    viewer;
1199 
1200   PetscFunctionBegin;
1201 
1202   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1204 
1205   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1206 
1207   /* loop over matrix elements drawing boxes */
1208   color = PETSC_DRAW_BLUE;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) >=  0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   color = PETSC_DRAW_CYAN;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) != 0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236 
1237   color = PETSC_DRAW_RED;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) <= 0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscReal      xl,yl,xr,yr,w,h;
1260   PetscDraw      draw;
1261   PetscTruth     isnull;
1262 
1263   PetscFunctionBegin;
1264 
1265   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1267 
1268   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270   xr += w;    yr += h;  xl = -w;     yl = -h;
1271   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ"
1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscTruth     iascii,isbinary,isdraw;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1288   if (iascii){
1289     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1290   } else if (isbinary) {
1291     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1292   } else if (isdraw) {
1293     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1294   } else {
1295     Mat B;
1296     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298     ierr = MatDestroy(B);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307 {
1308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310   PetscInt    *ai = a->i,*ailen = a->ilen;
1311   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313 
1314   PetscFunctionBegin;
1315   for (k=0; k<m; k++) { /* loop over rows */
1316     row  = im[k]; brow = row/bs;
1317     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320     nrow = ailen[brow];
1321     for (l=0; l<n; l++) { /* loop over columns */
1322       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324       col  = in[l] ;
1325       bcol = col/bs;
1326       cidx = col%bs;
1327       ridx = row%bs;
1328       high = nrow;
1329       low  = 0; /* assume unsorted */
1330       while (high-low > 5) {
1331         t = (low+high)/2;
1332         if (rp[t] > bcol) high = t;
1333         else             low  = t;
1334       }
1335       for (i=low; i<high; i++) {
1336         if (rp[i] > bcol) break;
1337         if (rp[i] == bcol) {
1338           *v++ = ap[bs2*i+bs*cidx+ridx];
1339           goto finished;
1340         }
1341       }
1342       *v++ = zero;
1343       finished:;
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_USE_MAT_SINGLE)
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353 {
1354   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355   PetscErrorCode ierr;
1356   PetscInt       i,N = m*n*b->bs2;
1357   MatScalar      *vsingle;
1358 
1359   PetscFunctionBegin;
1360   if (N > b->setvalueslen) {
1361     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363     b->setvalueslen  = N;
1364   }
1365   vsingle = b->setvaluescopy;
1366   for (i=0; i<N; i++) {
1367     vsingle[i] = v[i];
1368   }
1369   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 #endif
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378 {
1379   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382   PetscErrorCode  ierr;
1383   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384   PetscTruth      roworiented=a->roworiented;
1385   const MatScalar *value = v;
1386   MatScalar       *ap,*aa = a->a,*bap;
1387 
1388   PetscFunctionBegin;
1389   if (roworiented) {
1390     stepval = (n-1)*bs;
1391   } else {
1392     stepval = (m-1)*bs;
1393   }
1394   for (k=0; k<m; k++) { /* loop over added rows */
1395     row  = im[k];
1396     if (row < 0) continue;
1397 #if defined(PETSC_USE_DEBUG)
1398     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399 #endif
1400     rp   = aj + ai[row];
1401     ap   = aa + bs2*ai[row];
1402     rmax = imax[row];
1403     nrow = ailen[row];
1404     low  = 0;
1405     high = nrow;
1406     for (l=0; l<n; l++) { /* loop over added columns */
1407       if (in[l] < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410 #endif
1411       col = in[l];
1412       if (roworiented) {
1413         value = v + k*(stepval+bs)*bs + l*bs;
1414       } else {
1415         value = v + l*(stepval+bs)*bs + k*bs;
1416       }
1417       if (col < lastcol) low = 0; else high = nrow;
1418       lastcol = col;
1419       while (high-low > 7) {
1420         t = (low+high)/2;
1421         if (rp[t] > col) high = t;
1422         else             low  = t;
1423       }
1424       for (i=low; i<high; i++) {
1425         if (rp[i] > col) break;
1426         if (rp[i] == col) {
1427           bap  = ap +  bs2*i;
1428           if (roworiented) {
1429             if (is == ADD_VALUES) {
1430               for (ii=0; ii<bs; ii++,value+=stepval) {
1431                 for (jj=ii; jj<bs2; jj+=bs) {
1432                   bap[jj] += *value++;
1433                 }
1434               }
1435             } else {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] = *value++;
1439                 }
1440               }
1441             }
1442           } else {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=0; jj<bs; jj++) {
1446                   *bap++ += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++  = *value++;
1453                 }
1454               }
1455             }
1456           }
1457           goto noinsert2;
1458         }
1459       }
1460       if (nonew == 1) goto noinsert2;
1461       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462       if (nrow >= rmax) {
1463         /* there is no extra room in row, therefore enlarge */
1464         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1465         MatScalar *new_a;
1466 
1467         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468 
1469         /* malloc new storage space */
1470         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1471 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1472         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1473         new_i   = new_j + new_nz;
1474 
1475         /* copy over old data into new slots */
1476         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1477         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1478         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1479         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1480         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1481         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1482         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1483         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1484         /* free up old matrix storage */
1485         ierr = PetscFree(a->a);CHKERRQ(ierr);
1486         if (!a->singlemalloc) {
1487           ierr = PetscFree(a->i);CHKERRQ(ierr);
1488           ierr = PetscFree(a->j);CHKERRQ(ierr);
1489         }
1490         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1491         a->singlemalloc = PETSC_TRUE;
1492 
1493         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1494         rmax = imax[row] = imax[row] + CHUNKSIZE;
1495         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1496         a->maxnz += bs2*CHUNKSIZE;
1497         a->reallocs++;
1498         a->nz++;
1499       }
1500       N = nrow++ - 1;
1501       /* shift up all the later entries in this row */
1502       for (ii=N; ii>=i; ii--) {
1503         rp[ii+1] = rp[ii];
1504         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1505       }
1506       if (N >= i) {
1507         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1508       }
1509       rp[i] = col;
1510       bap   = ap +  bs2*i;
1511       if (roworiented) {
1512         for (ii=0; ii<bs; ii++,value+=stepval) {
1513           for (jj=ii; jj<bs2; jj+=bs) {
1514             bap[jj] = *value++;
1515           }
1516         }
1517       } else {
1518         for (ii=0; ii<bs; ii++,value+=stepval) {
1519           for (jj=0; jj<bs; jj++) {
1520             *bap++  = *value++;
1521           }
1522         }
1523       }
1524       noinsert2:;
1525       low = i;
1526     }
1527     ailen[row] = nrow;
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1534 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1535 {
1536   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1537   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1538   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1539   PetscErrorCode ierr;
1540   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1541   MatScalar      *aa = a->a,*ap;
1542   PetscReal      ratio=0.6;
1543 
1544   PetscFunctionBegin;
1545   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1546 
1547   if (m) rmax = ailen[0];
1548   for (i=1; i<mbs; i++) {
1549     /* move each row back by the amount of empty slots (fshift) before it*/
1550     fshift += imax[i-1] - ailen[i-1];
1551     rmax   = PetscMax(rmax,ailen[i]);
1552     if (fshift) {
1553       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1554       N = ailen[i];
1555       for (j=0; j<N; j++) {
1556         ip[j-fshift] = ip[j];
1557         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1558       }
1559     }
1560     ai[i] = ai[i-1] + ailen[i-1];
1561   }
1562   if (mbs) {
1563     fshift += imax[mbs-1] - ailen[mbs-1];
1564     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1565   }
1566   /* reset ilen and imax for each row */
1567   for (i=0; i<mbs; i++) {
1568     ailen[i] = imax[i] = ai[i+1] - ai[i];
1569   }
1570   a->nz = ai[mbs];
1571 
1572   /* diagonals may have moved, so kill the diagonal pointers */
1573   a->idiagvalid = PETSC_FALSE;
1574   if (fshift && a->diag) {
1575     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1576     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1577     a->diag = 0;
1578   }
1579   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1580   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1581   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1582   a->reallocs          = 0;
1583   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1584 
1585   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1586   if (a->compressedrow.use){
1587     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1588   }
1589 
1590   A->same_nonzero = PETSC_TRUE;
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 /*
1595    This function returns an array of flags which indicate the locations of contiguous
1596    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1597    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1598    Assume: sizes should be long enough to hold all the values.
1599 */
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1602 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1603 {
1604   PetscInt   i,j,k,row;
1605   PetscTruth flg;
1606 
1607   PetscFunctionBegin;
1608   for (i=0,j=0; i<n; j++) {
1609     row = idx[i];
1610     if (row%bs!=0) { /* Not the begining of a block */
1611       sizes[j] = 1;
1612       i++;
1613     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1614       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1615       i++;
1616     } else { /* Begining of the block, so check if the complete block exists */
1617       flg = PETSC_TRUE;
1618       for (k=1; k<bs; k++) {
1619         if (row+k != idx[i+k]) { /* break in the block */
1620           flg = PETSC_FALSE;
1621           break;
1622         }
1623       }
1624       if (flg) { /* No break in the bs */
1625         sizes[j] = bs;
1626         i+= bs;
1627       } else {
1628         sizes[j] = 1;
1629         i++;
1630       }
1631     }
1632   }
1633   *bs_max = j;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1639 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1640 {
1641   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1642   PetscErrorCode ierr;
1643   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1644   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1645   PetscScalar    zero = 0.0;
1646   MatScalar      *aa;
1647 
1648   PetscFunctionBegin;
1649   /* Make a copy of the IS and  sort it */
1650   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1651   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1652 
1653   /* allocate memory for rows,sizes */
1654   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1655   sizes = rows + is_n;
1656 
1657   /* copy IS values to rows, and sort them */
1658   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1659   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1660   if (baij->keepzeroedrows) {
1661     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1662     bs_max = is_n;
1663     A->same_nonzero = PETSC_TRUE;
1664   } else {
1665     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1666     A->same_nonzero = PETSC_FALSE;
1667   }
1668   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1669 
1670   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1671     row   = rows[j];
1672     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1673     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1674     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1675     if (sizes[i] == bs && !baij->keepzeroedrows) {
1676       if (diag) {
1677         if (baij->ilen[row/bs] > 0) {
1678           baij->ilen[row/bs]       = 1;
1679           baij->j[baij->i[row/bs]] = row/bs;
1680           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1681         }
1682         /* Now insert all the diagonal values for this bs */
1683         for (k=0; k<bs; k++) {
1684           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1685         }
1686       } else { /* (!diag) */
1687         baij->ilen[row/bs] = 0;
1688       } /* end (!diag) */
1689     } else { /* (sizes[i] != bs) */
1690 #if defined (PETSC_USE_DEBUG)
1691       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1692 #endif
1693       for (k=0; k<count; k++) {
1694         aa[0] =  zero;
1695         aa    += bs;
1696       }
1697       if (diag) {
1698         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1699       }
1700     }
1701   }
1702 
1703   ierr = PetscFree(rows);CHKERRQ(ierr);
1704   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1710 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1711 {
1712   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1713   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1714   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1715   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1716   PetscErrorCode ierr;
1717   PetscInt       ridx,cidx,bs2=a->bs2;
1718   PetscTruth     roworiented=a->roworiented;
1719   MatScalar      *ap,value,*aa=a->a,*bap;
1720 
1721   PetscFunctionBegin;
1722   for (k=0; k<m; k++) { /* loop over added rows */
1723     row  = im[k]; brow = row/bs;
1724     if (row < 0) continue;
1725 #if defined(PETSC_USE_DEBUG)
1726     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1727 #endif
1728     rp   = aj + ai[brow];
1729     ap   = aa + bs2*ai[brow];
1730     rmax = imax[brow];
1731     nrow = ailen[brow];
1732     low  = 0;
1733     high = nrow;
1734     for (l=0; l<n; l++) { /* loop over added columns */
1735       if (in[l] < 0) continue;
1736 #if defined(PETSC_USE_DEBUG)
1737       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1738 #endif
1739       col = in[l]; bcol = col/bs;
1740       ridx = row % bs; cidx = col % bs;
1741       if (roworiented) {
1742         value = v[l + k*n];
1743       } else {
1744         value = v[k + l*m];
1745       }
1746       if (col < lastcol) low = 0; else high = nrow;
1747       lastcol = col;
1748       while (high-low > 7) {
1749         t = (low+high)/2;
1750         if (rp[t] > bcol) high = t;
1751         else              low  = t;
1752       }
1753       for (i=low; i<high; i++) {
1754         if (rp[i] > bcol) break;
1755         if (rp[i] == bcol) {
1756           bap  = ap +  bs2*i + bs*cidx + ridx;
1757           if (is == ADD_VALUES) *bap += value;
1758           else                  *bap  = value;
1759           goto noinsert1;
1760         }
1761       }
1762       if (nonew == 1) goto noinsert1;
1763       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1764       if (nrow >= rmax) {
1765         /* there is no extra room in row, therefore enlarge */
1766         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1767         MatScalar *new_a;
1768 
1769         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1770 
1771         /* Malloc new storage space */
1772         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1773 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1774         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1775         new_i   = new_j + new_nz;
1776 
1777         /* copy over old data into new slots */
1778         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1779         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1780         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1781         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1782         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1783         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1784         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1785         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1786         /* free up old matrix storage */
1787         ierr = PetscFree(a->a);CHKERRQ(ierr);
1788         if (!a->singlemalloc) {
1789           ierr = PetscFree(a->i);CHKERRQ(ierr);
1790           ierr = PetscFree(a->j);CHKERRQ(ierr);
1791         }
1792         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1793         a->singlemalloc = PETSC_TRUE;
1794 
1795         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1796         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1797         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1798         a->maxnz += bs2*CHUNKSIZE;
1799         a->reallocs++;
1800         a->nz++;
1801       }
1802       N = nrow++ - 1;
1803       /* shift up all the later entries in this row */
1804       for (ii=N; ii>=i; ii--) {
1805         rp[ii+1] = rp[ii];
1806         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1807       }
1808       if (N>=i) {
1809         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1810       }
1811       rp[i]                      = bcol;
1812       ap[bs2*i + bs*cidx + ridx] = value;
1813       noinsert1:;
1814       low = i;
1815     }
1816     ailen[brow] = nrow;
1817   }
1818   A->same_nonzero = PETSC_FALSE;
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1825 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1826 {
1827   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1828   Mat            outA;
1829   PetscErrorCode ierr;
1830   PetscTruth     row_identity,col_identity;
1831 
1832   PetscFunctionBegin;
1833   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1834   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1835   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1836   if (!row_identity || !col_identity) {
1837     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1838   }
1839 
1840   outA          = inA;
1841   inA->factor   = FACTOR_LU;
1842 
1843   if (!a->diag) {
1844     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1845   }
1846 
1847   a->row        = row;
1848   a->col        = col;
1849   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1850   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1851 
1852   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1853   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1854   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1855 
1856   /*
1857       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1858       for ILU(0) factorization with natural ordering
1859   */
1860   if (inA->bs < 8) {
1861     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1862   } else {
1863     if (!a->solve_work) {
1864       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1865       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1866     }
1867   }
1868 
1869   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1870 
1871   PetscFunctionReturn(0);
1872 }
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1875 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1876 {
1877   static PetscTruth called = PETSC_FALSE;
1878   MPI_Comm          comm = A->comm;
1879   PetscErrorCode    ierr;
1880 
1881   PetscFunctionBegin;
1882   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1883   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1884   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 EXTERN_C_BEGIN
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1891 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1892 {
1893   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1894   PetscInt    i,nz,nbs;
1895 
1896   PetscFunctionBegin;
1897   nz  = baij->maxnz/baij->bs2;
1898   nbs = baij->nbs;
1899   for (i=0; i<nz; i++) {
1900     baij->j[i] = indices[i];
1901   }
1902   baij->nz = nz;
1903   for (i=0; i<nbs; i++) {
1904     baij->ilen[i] = baij->imax[i];
1905   }
1906 
1907   PetscFunctionReturn(0);
1908 }
1909 EXTERN_C_END
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1913 /*@
1914     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1915        in the matrix.
1916 
1917   Input Parameters:
1918 +  mat - the SeqBAIJ matrix
1919 -  indices - the column indices
1920 
1921   Level: advanced
1922 
1923   Notes:
1924     This can be called if you have precomputed the nonzero structure of the
1925   matrix and want to provide it to the matrix object to improve the performance
1926   of the MatSetValues() operation.
1927 
1928     You MUST have set the correct numbers of nonzeros per row in the call to
1929   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1930 
1931     MUST be called before any calls to MatSetValues();
1932 
1933 @*/
1934 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1935 {
1936   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1937 
1938   PetscFunctionBegin;
1939   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1940   PetscValidPointer(indices,2);
1941   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1942   if (f) {
1943     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1944   } else {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1952 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1953 {
1954   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1955   PetscErrorCode ierr;
1956   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1957   PetscReal      atmp;
1958   PetscScalar    *x,zero = 0.0;
1959   MatScalar      *aa;
1960   PetscInt       ncols,brow,krow,kcol;
1961 
1962   PetscFunctionBegin;
1963   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1964   bs   = A->bs;
1965   aa   = a->a;
1966   ai   = a->i;
1967   aj   = a->j;
1968   mbs = a->mbs;
1969 
1970   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1971   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1972   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1973   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1974   for (i=0; i<mbs; i++) {
1975     ncols = ai[1] - ai[0]; ai++;
1976     brow  = bs*i;
1977     for (j=0; j<ncols; j++){
1978       /* bcol = bs*(*aj); */
1979       for (kcol=0; kcol<bs; kcol++){
1980         for (krow=0; krow<bs; krow++){
1981           atmp = PetscAbsScalar(*aa); aa++;
1982           row = brow + krow;    /* row index */
1983           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1984           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1985         }
1986       }
1987       aj++;
1988     }
1989   }
1990   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1996 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1997 {
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2007 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2008 {
2009   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2010   PetscFunctionBegin;
2011   *array = a->a;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2017 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2018 {
2019   PetscFunctionBegin;
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #include "petscblaslapack.h"
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2026 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
2027 {
2028   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2029   PetscErrorCode ierr;
2030   PetscInt       i,bs=Y->bs,j,bs2;
2031   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2032 
2033   PetscFunctionBegin;
2034   if (str == SAME_NONZERO_PATTERN) {
2035     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2036   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2037     if (y->xtoy && y->XtoY != X) {
2038       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2039       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2040     }
2041     if (!y->xtoy) { /* get xtoy */
2042       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2043       y->XtoY = X;
2044     }
2045     bs2 = bs*bs;
2046     for (i=0; i<x->nz; i++) {
2047       j = 0;
2048       while (j < bs2){
2049         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2050         j++;
2051       }
2052     }
2053     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2054   } else {
2055     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 /* -------------------------------------------------------------------*/
2061 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2062        MatGetRow_SeqBAIJ,
2063        MatRestoreRow_SeqBAIJ,
2064        MatMult_SeqBAIJ_N,
2065 /* 4*/ MatMultAdd_SeqBAIJ_N,
2066        MatMultTranspose_SeqBAIJ,
2067        MatMultTransposeAdd_SeqBAIJ,
2068        MatSolve_SeqBAIJ_N,
2069        0,
2070        0,
2071 /*10*/ 0,
2072        MatLUFactor_SeqBAIJ,
2073        0,
2074        0,
2075        MatTranspose_SeqBAIJ,
2076 /*15*/ MatGetInfo_SeqBAIJ,
2077        MatEqual_SeqBAIJ,
2078        MatGetDiagonal_SeqBAIJ,
2079        MatDiagonalScale_SeqBAIJ,
2080        MatNorm_SeqBAIJ,
2081 /*20*/ 0,
2082        MatAssemblyEnd_SeqBAIJ,
2083        0,
2084        MatSetOption_SeqBAIJ,
2085        MatZeroEntries_SeqBAIJ,
2086 /*25*/ MatZeroRows_SeqBAIJ,
2087        MatLUFactorSymbolic_SeqBAIJ,
2088        MatLUFactorNumeric_SeqBAIJ_N,
2089        MatCholeskyFactorSymbolic_SeqBAIJ,
2090        MatCholeskyFactorNumeric_SeqBAIJ_N,
2091 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2092        MatILUFactorSymbolic_SeqBAIJ,
2093        MatICCFactorSymbolic_SeqBAIJ,
2094        MatGetArray_SeqBAIJ,
2095        MatRestoreArray_SeqBAIJ,
2096 /*35*/ MatDuplicate_SeqBAIJ,
2097        0,
2098        0,
2099        MatILUFactor_SeqBAIJ,
2100        0,
2101 /*40*/ MatAXPY_SeqBAIJ,
2102        MatGetSubMatrices_SeqBAIJ,
2103        MatIncreaseOverlap_SeqBAIJ,
2104        MatGetValues_SeqBAIJ,
2105        0,
2106 /*45*/ MatPrintHelp_SeqBAIJ,
2107        MatScale_SeqBAIJ,
2108        0,
2109        0,
2110        0,
2111 /*50*/ 0,
2112        MatGetRowIJ_SeqBAIJ,
2113        MatRestoreRowIJ_SeqBAIJ,
2114        0,
2115        0,
2116 /*55*/ 0,
2117        0,
2118        0,
2119        0,
2120        MatSetValuesBlocked_SeqBAIJ,
2121 /*60*/ MatGetSubMatrix_SeqBAIJ,
2122        MatDestroy_SeqBAIJ,
2123        MatView_SeqBAIJ,
2124        MatGetPetscMaps_Petsc,
2125        0,
2126 /*65*/ 0,
2127        0,
2128        0,
2129        0,
2130        0,
2131 /*70*/ MatGetRowMax_SeqBAIJ,
2132        MatConvert_Basic,
2133        0,
2134        0,
2135        0,
2136 /*75*/ 0,
2137        0,
2138        0,
2139        0,
2140        0,
2141 /*80*/ 0,
2142        0,
2143        0,
2144        0,
2145        MatLoad_SeqBAIJ,
2146 /*85*/ 0,
2147        0,
2148        0,
2149        0,
2150        0,
2151 /*90*/ 0,
2152        0,
2153        0,
2154        0,
2155        0,
2156 /*95*/ 0,
2157        0,
2158        0,
2159        0};
2160 
2161 EXTERN_C_BEGIN
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2164 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2165 {
2166   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2167   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   if (aij->nonew != 1) {
2172     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2173   }
2174 
2175   /* allocate space for values if not already there */
2176   if (!aij->saved_values) {
2177     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2178   }
2179 
2180   /* copy values over */
2181   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 EXTERN_C_END
2185 
2186 EXTERN_C_BEGIN
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2189 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2190 {
2191   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2192   PetscErrorCode ierr;
2193   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2194 
2195   PetscFunctionBegin;
2196   if (aij->nonew != 1) {
2197     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2198   }
2199   if (!aij->saved_values) {
2200     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2201   }
2202 
2203   /* copy values over */
2204   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 EXTERN_C_END
2208 
2209 EXTERN_C_BEGIN
2210 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2211 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2212 EXTERN_C_END
2213 
2214 EXTERN_C_BEGIN
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2217 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2218 {
2219   Mat_SeqBAIJ    *b;
2220   PetscErrorCode ierr;
2221   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2222   PetscTruth     flg;
2223 
2224   PetscFunctionBegin;
2225 
2226   B->preallocated = PETSC_TRUE;
2227   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2228   if (nnz && newbs != bs) {
2229     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2230   }
2231   bs   = newbs;
2232 
2233   mbs  = B->m/bs;
2234   nbs  = B->n/bs;
2235   bs2  = bs*bs;
2236 
2237   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2238     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2239   }
2240 
2241   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2242   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2243   if (nnz) {
2244     for (i=0; i<mbs; i++) {
2245       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2246       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2247     }
2248   }
2249 
2250   b       = (Mat_SeqBAIJ*)B->data;
2251   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2252   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2253   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2254   if (!flg) {
2255     switch (bs) {
2256     case 1:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2258       B->ops->mult            = MatMult_SeqBAIJ_1;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2260       break;
2261     case 2:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2263       B->ops->mult            = MatMult_SeqBAIJ_2;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2265       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2266       break;
2267     case 3:
2268       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2269       B->ops->mult            = MatMult_SeqBAIJ_3;
2270       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2271       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2272       break;
2273     case 4:
2274       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2275       B->ops->mult            = MatMult_SeqBAIJ_4;
2276       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2277       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2278       break;
2279     case 5:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2281       B->ops->mult            = MatMult_SeqBAIJ_5;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2283       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2284       break;
2285     case 6:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2287       B->ops->mult            = MatMult_SeqBAIJ_6;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2289       break;
2290     case 7:
2291       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2292       B->ops->mult            = MatMult_SeqBAIJ_7;
2293       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2294       break;
2295     default:
2296       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2297       B->ops->mult            = MatMult_SeqBAIJ_N;
2298       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2299       break;
2300     }
2301   }
2302   B->bs      = bs;
2303   b->mbs     = mbs;
2304   b->nbs     = nbs;
2305   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2306   if (!nnz) {
2307     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2308     else if (nz <= 0)        nz = 1;
2309     for (i=0; i<mbs; i++) b->imax[i] = nz;
2310     nz = nz*mbs;
2311   } else {
2312     nz = 0;
2313     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2314   }
2315 
2316   /* allocate the matrix space */
2317   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2318   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2319   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2320   b->j  = (PetscInt*)(b->a + nz*bs2);
2321   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2322   b->i  = b->j + nz;
2323   b->singlemalloc = PETSC_TRUE;
2324 
2325   b->i[0] = 0;
2326   for (i=1; i<mbs+1; i++) {
2327     b->i[i] = b->i[i-1] + b->imax[i-1];
2328   }
2329 
2330   /* b->ilen will count nonzeros in each block row so far. */
2331   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2332   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2333   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2334 
2335   B->bs               = bs;
2336   b->bs2              = bs2;
2337   b->mbs              = mbs;
2338   b->nz               = 0;
2339   b->maxnz            = nz*bs2;
2340   B->info.nz_unneeded = (PetscReal)b->maxnz;
2341   PetscFunctionReturn(0);
2342 }
2343 EXTERN_C_END
2344 
2345 /*MC
2346    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2347    block sparse compressed row format.
2348 
2349    Options Database Keys:
2350 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2351 
2352   Level: beginner
2353 
2354 .seealso: MatCreateSeqBAIJ
2355 M*/
2356 
2357 EXTERN_C_BEGIN
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatCreate_SeqBAIJ"
2360 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2361 {
2362   PetscErrorCode ierr;
2363   PetscMPIInt    size;
2364   Mat_SeqBAIJ    *b;
2365 
2366   PetscFunctionBegin;
2367   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2368   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2369 
2370   B->m = B->M = PetscMax(B->m,B->M);
2371   B->n = B->N = PetscMax(B->n,B->N);
2372   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2373   B->data = (void*)b;
2374   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2375   B->factor           = 0;
2376   B->mapping          = 0;
2377   b->row              = 0;
2378   b->col              = 0;
2379   b->icol             = 0;
2380   b->reallocs         = 0;
2381   b->saved_values     = 0;
2382 #if defined(PETSC_USE_MAT_SINGLE)
2383   b->setvalueslen     = 0;
2384   b->setvaluescopy    = PETSC_NULL;
2385 #endif
2386 
2387   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2388   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2389 
2390   b->sorted           = PETSC_FALSE;
2391   b->roworiented      = PETSC_TRUE;
2392   b->nonew            = 0;
2393   b->diag             = 0;
2394   b->solve_work       = 0;
2395   b->mult_work        = 0;
2396   B->spptr            = 0;
2397   B->info.nz_unneeded = (PetscReal)b->maxnz;
2398   b->keepzeroedrows   = PETSC_FALSE;
2399   b->xtoy              = 0;
2400   b->XtoY              = 0;
2401   b->compressedrow.use     = PETSC_FALSE;
2402   b->compressedrow.nrows   = 0;
2403   b->compressedrow.i       = PETSC_NULL;
2404   b->compressedrow.rindex  = PETSC_NULL;
2405   b->compressedrow.checked = PETSC_FALSE;
2406   B->same_nonzero          = PETSC_FALSE;
2407 
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2409                                      "MatInvertBlockDiagonal_SeqBAIJ",
2410                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2412                                      "MatStoreValues_SeqBAIJ",
2413                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2415                                      "MatRetrieveValues_SeqBAIJ",
2416                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2418                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2419                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2420   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2421                                      "MatConvert_SeqBAIJ_SeqAIJ",
2422                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2423 #if !defined(PETSC_USE_64BIT_INT)
2424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2425                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2426                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2427 #endif
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2429                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2430                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 EXTERN_C_END
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2437 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2438 {
2439   Mat            C;
2440   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2441   PetscErrorCode ierr;
2442   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2443 
2444   PetscFunctionBegin;
2445   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2446 
2447   *B = 0;
2448   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2449   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2450   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2451   c    = (Mat_SeqBAIJ*)C->data;
2452 
2453   C->M   = A->M;
2454   C->N   = A->N;
2455   C->bs  = A->bs;
2456   c->bs2 = a->bs2;
2457   c->mbs = a->mbs;
2458   c->nbs = a->nbs;
2459 
2460   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2461   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2462   for (i=0; i<mbs; i++) {
2463     c->imax[i] = a->imax[i];
2464     c->ilen[i] = a->ilen[i];
2465   }
2466 
2467   /* allocate the matrix space */
2468   c->singlemalloc = PETSC_TRUE;
2469   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2470   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2471   c->j = (PetscInt*)(c->a + nz*bs2);
2472   c->i = c->j + nz;
2473   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2474   if (mbs > 0) {
2475     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2476     if (cpvalues == MAT_COPY_VALUES) {
2477       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2478     } else {
2479       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2480     }
2481   }
2482 
2483   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2484   c->sorted      = a->sorted;
2485   c->roworiented = a->roworiented;
2486   c->nonew       = a->nonew;
2487 
2488   if (a->diag) {
2489     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2490     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2491     for (i=0; i<mbs; i++) {
2492       c->diag[i] = a->diag[i];
2493     }
2494   } else c->diag        = 0;
2495   c->nz                 = a->nz;
2496   c->maxnz              = a->maxnz;
2497   c->solve_work         = 0;
2498   c->mult_work          = 0;
2499   C->preallocated       = PETSC_TRUE;
2500   C->assembled          = PETSC_TRUE;
2501 
2502   c->compressedrow.use     = a->compressedrow.use;
2503   c->compressedrow.nrows   = a->compressedrow.nrows;
2504   c->compressedrow.checked = a->compressedrow.checked;
2505   if ( a->compressedrow.checked && a->compressedrow.use){
2506     i = a->compressedrow.nrows;
2507     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2508     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2509     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2510     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2511   } else {
2512     c->compressedrow.use    = PETSC_FALSE;
2513     c->compressedrow.i      = PETSC_NULL;
2514     c->compressedrow.rindex = PETSC_NULL;
2515   }
2516   C->same_nonzero = A->same_nonzero;
2517   *B = C;
2518   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatLoad_SeqBAIJ"
2524 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2525 {
2526   Mat_SeqBAIJ    *a;
2527   Mat            B;
2528   PetscErrorCode ierr;
2529   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2530   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2531   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2532   PetscInt       *masked,nmask,tmp,bs2,ishift;
2533   PetscMPIInt    size;
2534   int            fd;
2535   PetscScalar    *aa;
2536   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2540   bs2  = bs*bs;
2541 
2542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2543   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2544   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2545   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2546   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2547   M = header[1]; N = header[2]; nz = header[3];
2548 
2549   if (header[3] < 0) {
2550     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2551   }
2552 
2553   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2554 
2555   /*
2556      This code adds extra rows to make sure the number of rows is
2557     divisible by the blocksize
2558   */
2559   mbs        = M/bs;
2560   extra_rows = bs - M + bs*(mbs);
2561   if (extra_rows == bs) extra_rows = 0;
2562   else                  mbs++;
2563   if (extra_rows) {
2564     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2565   }
2566 
2567   /* read in row lengths */
2568   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2569   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2570   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2571 
2572   /* read in column indices */
2573   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2574   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2575   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2576 
2577   /* loop over row lengths determining block row lengths */
2578   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2579   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2580   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2581   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2582   masked   = mask + mbs;
2583   rowcount = 0; nzcount = 0;
2584   for (i=0; i<mbs; i++) {
2585     nmask = 0;
2586     for (j=0; j<bs; j++) {
2587       kmax = rowlengths[rowcount];
2588       for (k=0; k<kmax; k++) {
2589         tmp = jj[nzcount++]/bs;
2590         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2591       }
2592       rowcount++;
2593     }
2594     browlengths[i] += nmask;
2595     /* zero out the mask elements we set */
2596     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2597   }
2598 
2599   /* create our matrix */
2600   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2601   ierr = MatSetType(B,type);CHKERRQ(ierr);
2602   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2603   a = (Mat_SeqBAIJ*)B->data;
2604 
2605   /* set matrix "i" values */
2606   a->i[0] = 0;
2607   for (i=1; i<= mbs; i++) {
2608     a->i[i]      = a->i[i-1] + browlengths[i-1];
2609     a->ilen[i-1] = browlengths[i-1];
2610   }
2611   a->nz         = 0;
2612   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2613 
2614   /* read in nonzero values */
2615   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2616   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2617   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2618 
2619   /* set "a" and "j" values into matrix */
2620   nzcount = 0; jcount = 0;
2621   for (i=0; i<mbs; i++) {
2622     nzcountb = nzcount;
2623     nmask    = 0;
2624     for (j=0; j<bs; j++) {
2625       kmax = rowlengths[i*bs+j];
2626       for (k=0; k<kmax; k++) {
2627         tmp = jj[nzcount++]/bs;
2628 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2629       }
2630     }
2631     /* sort the masked values */
2632     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2633 
2634     /* set "j" values into matrix */
2635     maskcount = 1;
2636     for (j=0; j<nmask; j++) {
2637       a->j[jcount++]  = masked[j];
2638       mask[masked[j]] = maskcount++;
2639     }
2640     /* set "a" values into matrix */
2641     ishift = bs2*a->i[i];
2642     for (j=0; j<bs; j++) {
2643       kmax = rowlengths[i*bs+j];
2644       for (k=0; k<kmax; k++) {
2645         tmp       = jj[nzcountb]/bs ;
2646         block     = mask[tmp] - 1;
2647         point     = jj[nzcountb] - bs*tmp;
2648         idx       = ishift + bs2*block + j + bs*point;
2649         a->a[idx] = (MatScalar)aa[nzcountb++];
2650       }
2651     }
2652     /* zero out the mask elements we set */
2653     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2654   }
2655   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2656 
2657   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2658   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2659   ierr = PetscFree(aa);CHKERRQ(ierr);
2660   ierr = PetscFree(jj);CHKERRQ(ierr);
2661   ierr = PetscFree(mask);CHKERRQ(ierr);
2662 
2663   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = MatView_Private(B);CHKERRQ(ierr);
2666 
2667   *A = B;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatCreateSeqBAIJ"
2673 /*@C
2674    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2675    compressed row) format.  For good matrix assembly performance the
2676    user should preallocate the matrix storage by setting the parameter nz
2677    (or the array nnz).  By setting these parameters accurately, performance
2678    during matrix assembly can be increased by more than a factor of 50.
2679 
2680    Collective on MPI_Comm
2681 
2682    Input Parameters:
2683 +  comm - MPI communicator, set to PETSC_COMM_SELF
2684 .  bs - size of block
2685 .  m - number of rows
2686 .  n - number of columns
2687 .  nz - number of nonzero blocks  per block row (same for all rows)
2688 -  nnz - array containing the number of nonzero blocks in the various block rows
2689          (possibly different for each block row) or PETSC_NULL
2690 
2691    Output Parameter:
2692 .  A - the matrix
2693 
2694    Options Database Keys:
2695 .   -mat_no_unroll - uses code that does not unroll the loops in the
2696                      block calculations (much slower)
2697 .    -mat_block_size - size of the blocks to use
2698 
2699    Level: intermediate
2700 
2701    Notes:
2702    The number of rows and columns must be divisible by blocksize.
2703 
2704    If the nnz parameter is given then the nz parameter is ignored
2705 
2706    A nonzero block is any block that as 1 or more nonzeros in it
2707 
2708    The block AIJ format is fully compatible with standard Fortran 77
2709    storage.  That is, the stored row and column indices can begin at
2710    either one (as in Fortran) or zero.  See the users' manual for details.
2711 
2712    Specify the preallocated storage with either nz or nnz (not both).
2713    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2714    allocation.  For additional details, see the users manual chapter on
2715    matrices.
2716 
2717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2718 @*/
2719 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2720 {
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2725   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2726   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2732 /*@C
2733    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2734    per row in the matrix. For good matrix assembly performance the
2735    user should preallocate the matrix storage by setting the parameter nz
2736    (or the array nnz).  By setting these parameters accurately, performance
2737    during matrix assembly can be increased by more than a factor of 50.
2738 
2739    Collective on MPI_Comm
2740 
2741    Input Parameters:
2742 +  A - the matrix
2743 .  bs - size of block
2744 .  nz - number of block nonzeros per block row (same for all rows)
2745 -  nnz - array containing the number of block nonzeros in the various block rows
2746          (possibly different for each block row) or PETSC_NULL
2747 
2748    Options Database Keys:
2749 .   -mat_no_unroll - uses code that does not unroll the loops in the
2750                      block calculations (much slower)
2751 .    -mat_block_size - size of the blocks to use
2752 
2753    Level: intermediate
2754 
2755    Notes:
2756    If the nnz parameter is given then the nz parameter is ignored
2757 
2758    The block AIJ format is fully compatible with standard Fortran 77
2759    storage.  That is, the stored row and column indices can begin at
2760    either one (as in Fortran) or zero.  See the users' manual for details.
2761 
2762    Specify the preallocated storage with either nz or nnz (not both).
2763    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2764    allocation.  For additional details, see the users manual chapter on
2765    matrices.
2766 
2767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2768 @*/
2769 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2770 {
2771   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2775   if (f) {
2776     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2777   }
2778   PetscFunctionReturn(0);
2779 }
2780 
2781