xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a96a251defc07734f733d1abf27b5765e87fdc8b)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   if (!a->singlemalloc) {
831     ierr = PetscFree(a->a);CHKERRQ(ierr);
832     ierr = PetscFree(a->i);CHKERRQ(ierr);
833     ierr = PetscFree(a->j);CHKERRQ(ierr);
834   } else {
835     ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
836   }
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
844   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
845   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
846   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
847   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
848   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
849   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
850   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
851 #if defined(PETSC_USE_MAT_SINGLE)
852   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
853 #endif
854   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
855   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
856 
857   ierr = PetscFree(a);CHKERRQ(ierr);
858 
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     break;
923   default:
924     SETERRQ(PETSC_ERR_SUP,"unknown option");
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatGetRow_SeqBAIJ"
931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
932 {
933   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
934   PetscErrorCode ierr;
935   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
936   MatScalar      *aa,*aa_i;
937   PetscScalar    *v_i;
938 
939   PetscFunctionBegin;
940   bs  = A->bs;
941   ai  = a->i;
942   aj  = a->j;
943   aa  = a->a;
944   bs2 = a->bs2;
945 
946   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
947 
948   bn  = row/bs;   /* Block number */
949   bp  = row % bs; /* Block Position */
950   M   = ai[bn+1] - ai[bn];
951   *nz = bs*M;
952 
953   if (v) {
954     *v = 0;
955     if (*nz) {
956       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
957       for (i=0; i<M; i++) { /* for each block in the block row */
958         v_i  = *v + i*bs;
959         aa_i = aa + bs2*(ai[bn] + i);
960         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
961       }
962     }
963   }
964 
965   if (idx) {
966     *idx = 0;
967     if (*nz) {
968       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
969       for (i=0; i<M; i++) { /* for each block in the block row */
970         idx_i = *idx + i*bs;
971         itmp  = bs*aj[ai[bn] + i];
972         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
973       }
974     }
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
987   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatTranspose_SeqBAIJ"
993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
994 {
995   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
996   Mat            C;
997   PetscErrorCode ierr;
998   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
999   PetscInt       *rows,*cols,bs2=a->bs2;
1000   PetscScalar    *array;
1001 
1002   PetscFunctionBegin;
1003   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1004   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1005   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1006 
1007 #if defined(PETSC_USE_MAT_SINGLE)
1008   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1009   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1010 #else
1011   array = a->a;
1012 #endif
1013 
1014   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1015   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1016   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1017   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1018   ierr = PetscFree(col);CHKERRQ(ierr);
1019   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1020   cols = rows + bs;
1021   for (i=0; i<mbs; i++) {
1022     cols[0] = i*bs;
1023     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1024     len = ai[i+1] - ai[i];
1025     for (j=0; j<len; j++) {
1026       rows[0] = (*aj++)*bs;
1027       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1028       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1029       array += bs2;
1030     }
1031   }
1032   ierr = PetscFree(rows);CHKERRQ(ierr);
1033 #if defined(PETSC_USE_MAT_SINGLE)
1034   ierr = PetscFree(array);
1035 #endif
1036 
1037   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039 
1040   if (B) {
1041     *B = C;
1042   } else {
1043     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1051 {
1052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1055   int            fd;
1056   PetscScalar    *aa;
1057   FILE           *file;
1058 
1059   PetscFunctionBegin;
1060   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1061   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1062   col_lens[0] = MAT_FILE_COOKIE;
1063 
1064   col_lens[1] = A->m;
1065   col_lens[2] = A->n;
1066   col_lens[3] = a->nz*bs2;
1067 
1068   /* store lengths of each row and write (including header) to file */
1069   count = 0;
1070   for (i=0; i<a->mbs; i++) {
1071     for (j=0; j<bs; j++) {
1072       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1073     }
1074   }
1075   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1076   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1077 
1078   /* store column indices (zero start index) */
1079   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1080   count = 0;
1081   for (i=0; i<a->mbs; i++) {
1082     for (j=0; j<bs; j++) {
1083       for (k=a->i[i]; k<a->i[i+1]; k++) {
1084         for (l=0; l<bs; l++) {
1085           jj[count++] = bs*a->j[k] + l;
1086         }
1087       }
1088     }
1089   }
1090   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1091   ierr = PetscFree(jj);CHKERRQ(ierr);
1092 
1093   /* store nonzero values */
1094   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1095   count = 0;
1096   for (i=0; i<a->mbs; i++) {
1097     for (j=0; j<bs; j++) {
1098       for (k=a->i[i]; k<a->i[i+1]; k++) {
1099         for (l=0; l<bs; l++) {
1100           aa[count++] = a->a[bs2*k + l*bs + j];
1101         }
1102       }
1103     }
1104   }
1105   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1106   ierr = PetscFree(aa);CHKERRQ(ierr);
1107 
1108   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1109   if (file) {
1110     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1118 {
1119   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1120   PetscErrorCode    ierr;
1121   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1122   PetscViewerFormat format;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1126   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1127     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1128   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1129     Mat aij;
1130     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1131     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1132     ierr = MatDestroy(aij);CHKERRQ(ierr);
1133   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1134      PetscFunctionReturn(0);
1135   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1136     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1137     for (i=0; i<a->mbs; i++) {
1138       for (j=0; j<bs; j++) {
1139         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1140         for (k=a->i[i]; k<a->i[i+1]; k++) {
1141           for (l=0; l<bs; l++) {
1142 #if defined(PETSC_USE_COMPLEX)
1143             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1145                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1146             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             }
1152 #else
1153             if (a->a[bs2*k + l*bs + j] != 0.0) {
1154               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1155             }
1156 #endif
1157           }
1158         }
1159         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1160       }
1161     }
1162     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1163   } else {
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1165     for (i=0; i<a->mbs; i++) {
1166       for (j=0; j<bs; j++) {
1167         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1168         for (k=a->i[i]; k<a->i[i+1]; k++) {
1169           for (l=0; l<bs; l++) {
1170 #if defined(PETSC_USE_COMPLEX)
1171             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1173                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             }
1180 #else
1181             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1182 #endif
1183           }
1184         }
1185         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1186       }
1187     }
1188     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1189   }
1190   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1197 {
1198   Mat            A = (Mat) Aa;
1199   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1202   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1203   MatScalar      *aa;
1204   PetscViewer    viewer;
1205 
1206   PetscFunctionBegin;
1207 
1208   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1209   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1210 
1211   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1212 
1213   /* loop over matrix elements drawing boxes */
1214   color = PETSC_DRAW_BLUE;
1215   for (i=0,row=0; i<mbs; i++,row+=bs) {
1216     for (j=a->i[i]; j<a->i[i+1]; j++) {
1217       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1218       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219       aa = a->a + j*bs2;
1220       for (k=0; k<bs; k++) {
1221         for (l=0; l<bs; l++) {
1222           if (PetscRealPart(*aa++) >=  0.) continue;
1223           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1224         }
1225       }
1226     }
1227   }
1228   color = PETSC_DRAW_CYAN;
1229   for (i=0,row=0; i<mbs; i++,row+=bs) {
1230     for (j=a->i[i]; j<a->i[i+1]; j++) {
1231       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1232       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1233       aa = a->a + j*bs2;
1234       for (k=0; k<bs; k++) {
1235         for (l=0; l<bs; l++) {
1236           if (PetscRealPart(*aa++) != 0.) continue;
1237           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1238         }
1239       }
1240     }
1241   }
1242 
1243   color = PETSC_DRAW_RED;
1244   for (i=0,row=0; i<mbs; i++,row+=bs) {
1245     for (j=a->i[i]; j<a->i[i+1]; j++) {
1246       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1247       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1248       aa = a->a + j*bs2;
1249       for (k=0; k<bs; k++) {
1250         for (l=0; l<bs; l++) {
1251           if (PetscRealPart(*aa++) <= 0.) continue;
1252           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1253         }
1254       }
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1263 {
1264   PetscErrorCode ierr;
1265   PetscReal      xl,yl,xr,yr,w,h;
1266   PetscDraw      draw;
1267   PetscTruth     isnull;
1268 
1269   PetscFunctionBegin;
1270 
1271   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1272   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1273 
1274   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1275   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1276   xr += w;    yr += h;  xl = -w;     yl = -h;
1277   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1278   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1279   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ"
1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscTruth     iascii,isbinary,isdraw;
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1294   if (iascii){
1295     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1296   } else if (isbinary) {
1297     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1298   } else if (isdraw) {
1299     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1300   } else {
1301     Mat B;
1302     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1303     ierr = MatView(B,viewer);CHKERRQ(ierr);
1304     ierr = MatDestroy(B);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1313 {
1314   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1316   PetscInt    *ai = a->i,*ailen = a->ilen;
1317   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1318   MatScalar   *ap,*aa = a->a,zero = 0.0;
1319 
1320   PetscFunctionBegin;
1321   for (k=0; k<m; k++) { /* loop over rows */
1322     row  = im[k]; brow = row/bs;
1323     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1324     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1325     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1326     nrow = ailen[brow];
1327     for (l=0; l<n; l++) { /* loop over columns */
1328       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1329       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1330       col  = in[l] ;
1331       bcol = col/bs;
1332       cidx = col%bs;
1333       ridx = row%bs;
1334       high = nrow;
1335       low  = 0; /* assume unsorted */
1336       while (high-low > 5) {
1337         t = (low+high)/2;
1338         if (rp[t] > bcol) high = t;
1339         else             low  = t;
1340       }
1341       for (i=low; i<high; i++) {
1342         if (rp[i] > bcol) break;
1343         if (rp[i] == bcol) {
1344           *v++ = ap[bs2*i+bs*cidx+ridx];
1345           goto finished;
1346         }
1347       }
1348       *v++ = zero;
1349       finished:;
1350     }
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #if defined(PETSC_USE_MAT_SINGLE)
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1359 {
1360   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1361   PetscErrorCode ierr;
1362   PetscInt       i,N = m*n*b->bs2;
1363   MatScalar      *vsingle;
1364 
1365   PetscFunctionBegin;
1366   if (N > b->setvalueslen) {
1367     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1368     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1369     b->setvalueslen  = N;
1370   }
1371   vsingle = b->setvaluescopy;
1372   for (i=0; i<N; i++) {
1373     vsingle[i] = v[i];
1374   }
1375   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 #endif
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1384 {
1385   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1386   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1387   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1388   PetscErrorCode  ierr;
1389   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1390   PetscTruth      roworiented=a->roworiented;
1391   const MatScalar *value = v;
1392   MatScalar       *ap,*aa = a->a,*bap;
1393 
1394   PetscFunctionBegin;
1395   if (roworiented) {
1396     stepval = (n-1)*bs;
1397   } else {
1398     stepval = (m-1)*bs;
1399   }
1400   for (k=0; k<m; k++) { /* loop over added rows */
1401     row  = im[k];
1402     if (row < 0) continue;
1403 #if defined(PETSC_USE_DEBUG)
1404     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1405 #endif
1406     rp   = aj + ai[row];
1407     ap   = aa + bs2*ai[row];
1408     rmax = imax[row];
1409     nrow = ailen[row];
1410     low  = 0;
1411     high = nrow;
1412     for (l=0; l<n; l++) { /* loop over added columns */
1413       if (in[l] < 0) continue;
1414 #if defined(PETSC_USE_DEBUG)
1415       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1416 #endif
1417       col = in[l];
1418       if (roworiented) {
1419         value = v + k*(stepval+bs)*bs + l*bs;
1420       } else {
1421         value = v + l*(stepval+bs)*bs + k*bs;
1422       }
1423       if (col < lastcol) low = 0; else high = nrow;
1424       lastcol = col;
1425       while (high-low > 7) {
1426         t = (low+high)/2;
1427         if (rp[t] > col) high = t;
1428         else             low  = t;
1429       }
1430       for (i=low; i<high; i++) {
1431         if (rp[i] > col) break;
1432         if (rp[i] == col) {
1433           bap  = ap +  bs2*i;
1434           if (roworiented) {
1435             if (is == ADD_VALUES) {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] += *value++;
1439                 }
1440               }
1441             } else {
1442               for (ii=0; ii<bs; ii++,value+=stepval) {
1443                 for (jj=ii; jj<bs2; jj+=bs) {
1444                   bap[jj] = *value++;
1445                 }
1446               }
1447             }
1448           } else {
1449             if (is == ADD_VALUES) {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++ += *value++;
1453                 }
1454               }
1455             } else {
1456               for (ii=0; ii<bs; ii++,value+=stepval) {
1457                 for (jj=0; jj<bs; jj++) {
1458                   *bap++  = *value++;
1459                 }
1460               }
1461             }
1462           }
1463           goto noinsert2;
1464         }
1465       }
1466       if (nonew == 1) goto noinsert2;
1467       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468       if (nrow >= rmax) {
1469         /* there is no extra room in row, therefore enlarge */
1470         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1471         MatScalar *new_a;
1472 
1473         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1474 
1475         /* malloc new storage space */
1476         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1477 
1478         /* copy over old data into new slots */
1479         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1480         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1481         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1482         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1483         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1484         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1485         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1486         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1487         /* free up old matrix storage */
1488        if (!a->singlemalloc) {
1489           ierr = PetscFree(a->a);CHKERRQ(ierr);
1490           ierr = PetscFree(a->i);CHKERRQ(ierr);
1491           ierr = PetscFree(a->j);CHKERRQ(ierr);
1492         } else {
1493           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
1494         }
1495         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1496         a->singlemalloc = PETSC_TRUE;
1497 
1498         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1499         rmax = imax[row] = imax[row] + CHUNKSIZE;
1500         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1501         a->maxnz += bs2*CHUNKSIZE;
1502         a->reallocs++;
1503         a->nz++;
1504       }
1505       N = nrow++ - 1;
1506       /* shift up all the later entries in this row */
1507       for (ii=N; ii>=i; ii--) {
1508         rp[ii+1] = rp[ii];
1509         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1510       }
1511       if (N >= i) {
1512         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1513       }
1514       rp[i] = col;
1515       bap   = ap +  bs2*i;
1516       if (roworiented) {
1517         for (ii=0; ii<bs; ii++,value+=stepval) {
1518           for (jj=ii; jj<bs2; jj+=bs) {
1519             bap[jj] = *value++;
1520           }
1521         }
1522       } else {
1523         for (ii=0; ii<bs; ii++,value+=stepval) {
1524           for (jj=0; jj<bs; jj++) {
1525             *bap++  = *value++;
1526           }
1527         }
1528       }
1529       noinsert2:;
1530       low = i;
1531     }
1532     ailen[row] = nrow;
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1539 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1540 {
1541   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1542   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1543   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1544   PetscErrorCode ierr;
1545   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1546   MatScalar      *aa = a->a,*ap;
1547   PetscReal      ratio=0.6;
1548 
1549   PetscFunctionBegin;
1550   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1551 
1552   if (m) rmax = ailen[0];
1553   for (i=1; i<mbs; i++) {
1554     /* move each row back by the amount of empty slots (fshift) before it*/
1555     fshift += imax[i-1] - ailen[i-1];
1556     rmax   = PetscMax(rmax,ailen[i]);
1557     if (fshift) {
1558       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1559       N = ailen[i];
1560       for (j=0; j<N; j++) {
1561         ip[j-fshift] = ip[j];
1562         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1563       }
1564     }
1565     ai[i] = ai[i-1] + ailen[i-1];
1566   }
1567   if (mbs) {
1568     fshift += imax[mbs-1] - ailen[mbs-1];
1569     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1570   }
1571   /* reset ilen and imax for each row */
1572   for (i=0; i<mbs; i++) {
1573     ailen[i] = imax[i] = ai[i+1] - ai[i];
1574   }
1575   a->nz = ai[mbs];
1576 
1577   /* diagonals may have moved, so kill the diagonal pointers */
1578   a->idiagvalid = PETSC_FALSE;
1579   if (fshift && a->diag) {
1580     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1581     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1582     a->diag = 0;
1583   }
1584   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1585   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1586   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1587   a->reallocs          = 0;
1588   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1589 
1590   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1591   if (a->compressedrow.use){
1592     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1593   }
1594 
1595   A->same_nonzero = PETSC_TRUE;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 /*
1600    This function returns an array of flags which indicate the locations of contiguous
1601    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1602    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1603    Assume: sizes should be long enough to hold all the values.
1604 */
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1607 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1608 {
1609   PetscInt   i,j,k,row;
1610   PetscTruth flg;
1611 
1612   PetscFunctionBegin;
1613   for (i=0,j=0; i<n; j++) {
1614     row = idx[i];
1615     if (row%bs!=0) { /* Not the begining of a block */
1616       sizes[j] = 1;
1617       i++;
1618     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1619       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1620       i++;
1621     } else { /* Begining of the block, so check if the complete block exists */
1622       flg = PETSC_TRUE;
1623       for (k=1; k<bs; k++) {
1624         if (row+k != idx[i+k]) { /* break in the block */
1625           flg = PETSC_FALSE;
1626           break;
1627         }
1628       }
1629       if (flg) { /* No break in the bs */
1630         sizes[j] = bs;
1631         i+= bs;
1632       } else {
1633         sizes[j] = 1;
1634         i++;
1635       }
1636     }
1637   }
1638   *bs_max = j;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1644 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1645 {
1646   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1647   PetscErrorCode ierr;
1648   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1649   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1650   PetscScalar    zero = 0.0;
1651   MatScalar      *aa;
1652 
1653   PetscFunctionBegin;
1654   /* Make a copy of the IS and  sort it */
1655   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1656   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1657 
1658   /* allocate memory for rows,sizes */
1659   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1660   sizes = rows + is_n;
1661 
1662   /* copy IS values to rows, and sort them */
1663   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1664   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1665   if (baij->keepzeroedrows) {
1666     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1667     bs_max = is_n;
1668     A->same_nonzero = PETSC_TRUE;
1669   } else {
1670     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1671     A->same_nonzero = PETSC_FALSE;
1672   }
1673   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1674 
1675   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1676     row   = rows[j];
1677     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1678     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1679     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1680     if (sizes[i] == bs && !baij->keepzeroedrows) {
1681       if (diag) {
1682         if (baij->ilen[row/bs] > 0) {
1683           baij->ilen[row/bs]       = 1;
1684           baij->j[baij->i[row/bs]] = row/bs;
1685           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1686         }
1687         /* Now insert all the diagonal values for this bs */
1688         for (k=0; k<bs; k++) {
1689           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1690         }
1691       } else { /* (!diag) */
1692         baij->ilen[row/bs] = 0;
1693       } /* end (!diag) */
1694     } else { /* (sizes[i] != bs) */
1695 #if defined (PETSC_USE_DEBUG)
1696       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1697 #endif
1698       for (k=0; k<count; k++) {
1699         aa[0] =  zero;
1700         aa    += bs;
1701       }
1702       if (diag) {
1703         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1704       }
1705     }
1706   }
1707 
1708   ierr = PetscFree(rows);CHKERRQ(ierr);
1709   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1715 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1716 {
1717   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1718   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1719   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1720   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1721   PetscErrorCode ierr;
1722   PetscInt       ridx,cidx,bs2=a->bs2;
1723   PetscTruth     roworiented=a->roworiented;
1724   MatScalar      *ap,value,*aa=a->a,*bap;
1725 
1726   PetscFunctionBegin;
1727   for (k=0; k<m; k++) { /* loop over added rows */
1728     row  = im[k]; brow = row/bs;
1729     if (row < 0) continue;
1730 #if defined(PETSC_USE_DEBUG)
1731     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1732 #endif
1733     rp   = aj + ai[brow];
1734     ap   = aa + bs2*ai[brow];
1735     rmax = imax[brow];
1736     nrow = ailen[brow];
1737     low  = 0;
1738     high = nrow;
1739     for (l=0; l<n; l++) { /* loop over added columns */
1740       if (in[l] < 0) continue;
1741 #if defined(PETSC_USE_DEBUG)
1742       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1743 #endif
1744       col = in[l]; bcol = col/bs;
1745       ridx = row % bs; cidx = col % bs;
1746       if (roworiented) {
1747         value = v[l + k*n];
1748       } else {
1749         value = v[k + l*m];
1750       }
1751       if (col < lastcol) low = 0; else high = nrow;
1752       lastcol = col;
1753       while (high-low > 7) {
1754         t = (low+high)/2;
1755         if (rp[t] > bcol) high = t;
1756         else              low  = t;
1757       }
1758       for (i=low; i<high; i++) {
1759         if (rp[i] > bcol) break;
1760         if (rp[i] == bcol) {
1761           bap  = ap +  bs2*i + bs*cidx + ridx;
1762           if (is == ADD_VALUES) *bap += value;
1763           else                  *bap  = value;
1764           goto noinsert1;
1765         }
1766       }
1767       if (nonew == 1) goto noinsert1;
1768       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1769       if (nrow >= rmax) {
1770         /* there is no extra room in row, therefore enlarge */
1771         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1772         MatScalar *new_a;
1773 
1774         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1775 
1776         /* Malloc new storage space */
1777         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1778 
1779         /* copy over old data into new slots */
1780         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1781         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1782         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1783         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1784         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1785         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1786         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1787         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1788         /* free up old matrix storage */
1789         /* free up old matrix storage */
1790        if (!a->singlemalloc) {
1791           ierr = PetscFree(a->a);CHKERRQ(ierr);
1792           ierr = PetscFree(a->i);CHKERRQ(ierr);
1793           ierr = PetscFree(a->j);CHKERRQ(ierr);
1794         } else {
1795           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);
1796         }
1797         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1798         a->singlemalloc = PETSC_TRUE;
1799 
1800         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1801         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1802         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1803         a->maxnz += bs2*CHUNKSIZE;
1804         a->reallocs++;
1805         a->nz++;
1806       }
1807       N = nrow++ - 1;
1808       /* shift up all the later entries in this row */
1809       for (ii=N; ii>=i; ii--) {
1810         rp[ii+1] = rp[ii];
1811         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1812       }
1813       if (N>=i) {
1814         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1815       }
1816       rp[i]                      = bcol;
1817       ap[bs2*i + bs*cidx + ridx] = value;
1818       noinsert1:;
1819       low = i;
1820     }
1821     ailen[brow] = nrow;
1822   }
1823   A->same_nonzero = PETSC_FALSE;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1830 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1831 {
1832   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1833   Mat            outA;
1834   PetscErrorCode ierr;
1835   PetscTruth     row_identity,col_identity;
1836 
1837   PetscFunctionBegin;
1838   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1839   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1840   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1841   if (!row_identity || !col_identity) {
1842     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1843   }
1844 
1845   outA          = inA;
1846   inA->factor   = FACTOR_LU;
1847 
1848   if (!a->diag) {
1849     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1850   }
1851 
1852   a->row        = row;
1853   a->col        = col;
1854   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1855   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1856 
1857   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1858   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1859   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1860 
1861   /*
1862       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1863       for ILU(0) factorization with natural ordering
1864   */
1865   if (inA->bs < 8) {
1866     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1867   } else {
1868     if (!a->solve_work) {
1869       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1870       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1871     }
1872   }
1873 
1874   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1875 
1876   PetscFunctionReturn(0);
1877 }
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1880 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1881 {
1882   static PetscTruth called = PETSC_FALSE;
1883   MPI_Comm          comm = A->comm;
1884   PetscErrorCode    ierr;
1885 
1886   PetscFunctionBegin;
1887   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1888   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1889   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 EXTERN_C_BEGIN
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1896 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1897 {
1898   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1899   PetscInt    i,nz,nbs;
1900 
1901   PetscFunctionBegin;
1902   nz  = baij->maxnz/baij->bs2;
1903   nbs = baij->nbs;
1904   for (i=0; i<nz; i++) {
1905     baij->j[i] = indices[i];
1906   }
1907   baij->nz = nz;
1908   for (i=0; i<nbs; i++) {
1909     baij->ilen[i] = baij->imax[i];
1910   }
1911 
1912   PetscFunctionReturn(0);
1913 }
1914 EXTERN_C_END
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1918 /*@
1919     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1920        in the matrix.
1921 
1922   Input Parameters:
1923 +  mat - the SeqBAIJ matrix
1924 -  indices - the column indices
1925 
1926   Level: advanced
1927 
1928   Notes:
1929     This can be called if you have precomputed the nonzero structure of the
1930   matrix and want to provide it to the matrix object to improve the performance
1931   of the MatSetValues() operation.
1932 
1933     You MUST have set the correct numbers of nonzeros per row in the call to
1934   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1935 
1936     MUST be called before any calls to MatSetValues();
1937 
1938 @*/
1939 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1940 {
1941   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1942 
1943   PetscFunctionBegin;
1944   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1945   PetscValidPointer(indices,2);
1946   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1947   if (f) {
1948     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1949   } else {
1950     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1957 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1958 {
1959   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1960   PetscErrorCode ierr;
1961   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1962   PetscReal      atmp;
1963   PetscScalar    *x,zero = 0.0;
1964   MatScalar      *aa;
1965   PetscInt       ncols,brow,krow,kcol;
1966 
1967   PetscFunctionBegin;
1968   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1969   bs   = A->bs;
1970   aa   = a->a;
1971   ai   = a->i;
1972   aj   = a->j;
1973   mbs = a->mbs;
1974 
1975   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1976   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1977   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1978   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1979   for (i=0; i<mbs; i++) {
1980     ncols = ai[1] - ai[0]; ai++;
1981     brow  = bs*i;
1982     for (j=0; j<ncols; j++){
1983       /* bcol = bs*(*aj); */
1984       for (kcol=0; kcol<bs; kcol++){
1985         for (krow=0; krow<bs; krow++){
1986           atmp = PetscAbsScalar(*aa); aa++;
1987           row = brow + krow;    /* row index */
1988           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1989           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1990         }
1991       }
1992       aj++;
1993     }
1994   }
1995   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2001 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2002 {
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2012 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2013 {
2014   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2015   PetscFunctionBegin;
2016   *array = a->a;
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2022 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2023 {
2024   PetscFunctionBegin;
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #include "petscblaslapack.h"
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2031 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
2032 {
2033   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2034   PetscErrorCode ierr;
2035   PetscInt       i,bs=Y->bs,j,bs2;
2036   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2037 
2038   PetscFunctionBegin;
2039   if (str == SAME_NONZERO_PATTERN) {
2040     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2041   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2042     if (y->xtoy && y->XtoY != X) {
2043       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2044       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2045     }
2046     if (!y->xtoy) { /* get xtoy */
2047       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2048       y->XtoY = X;
2049     }
2050     bs2 = bs*bs;
2051     for (i=0; i<x->nz; i++) {
2052       j = 0;
2053       while (j < bs2){
2054         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2055         j++;
2056       }
2057     }
2058     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2059   } else {
2060     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 /* -------------------------------------------------------------------*/
2066 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2067        MatGetRow_SeqBAIJ,
2068        MatRestoreRow_SeqBAIJ,
2069        MatMult_SeqBAIJ_N,
2070 /* 4*/ MatMultAdd_SeqBAIJ_N,
2071        MatMultTranspose_SeqBAIJ,
2072        MatMultTransposeAdd_SeqBAIJ,
2073        MatSolve_SeqBAIJ_N,
2074        0,
2075        0,
2076 /*10*/ 0,
2077        MatLUFactor_SeqBAIJ,
2078        0,
2079        0,
2080        MatTranspose_SeqBAIJ,
2081 /*15*/ MatGetInfo_SeqBAIJ,
2082        MatEqual_SeqBAIJ,
2083        MatGetDiagonal_SeqBAIJ,
2084        MatDiagonalScale_SeqBAIJ,
2085        MatNorm_SeqBAIJ,
2086 /*20*/ 0,
2087        MatAssemblyEnd_SeqBAIJ,
2088        0,
2089        MatSetOption_SeqBAIJ,
2090        MatZeroEntries_SeqBAIJ,
2091 /*25*/ MatZeroRows_SeqBAIJ,
2092        MatLUFactorSymbolic_SeqBAIJ,
2093        MatLUFactorNumeric_SeqBAIJ_N,
2094        MatCholeskyFactorSymbolic_SeqBAIJ,
2095        MatCholeskyFactorNumeric_SeqBAIJ_N,
2096 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2097        MatILUFactorSymbolic_SeqBAIJ,
2098        MatICCFactorSymbolic_SeqBAIJ,
2099        MatGetArray_SeqBAIJ,
2100        MatRestoreArray_SeqBAIJ,
2101 /*35*/ MatDuplicate_SeqBAIJ,
2102        0,
2103        0,
2104        MatILUFactor_SeqBAIJ,
2105        0,
2106 /*40*/ MatAXPY_SeqBAIJ,
2107        MatGetSubMatrices_SeqBAIJ,
2108        MatIncreaseOverlap_SeqBAIJ,
2109        MatGetValues_SeqBAIJ,
2110        0,
2111 /*45*/ MatPrintHelp_SeqBAIJ,
2112        MatScale_SeqBAIJ,
2113        0,
2114        0,
2115        0,
2116 /*50*/ 0,
2117        MatGetRowIJ_SeqBAIJ,
2118        MatRestoreRowIJ_SeqBAIJ,
2119        0,
2120        0,
2121 /*55*/ 0,
2122        0,
2123        0,
2124        0,
2125        MatSetValuesBlocked_SeqBAIJ,
2126 /*60*/ MatGetSubMatrix_SeqBAIJ,
2127        MatDestroy_SeqBAIJ,
2128        MatView_SeqBAIJ,
2129        MatGetPetscMaps_Petsc,
2130        0,
2131 /*65*/ 0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*70*/ MatGetRowMax_SeqBAIJ,
2137        MatConvert_Basic,
2138        0,
2139        0,
2140        0,
2141 /*75*/ 0,
2142        0,
2143        0,
2144        0,
2145        0,
2146 /*80*/ 0,
2147        0,
2148        0,
2149        0,
2150        MatLoad_SeqBAIJ,
2151 /*85*/ 0,
2152        0,
2153        0,
2154        0,
2155        0,
2156 /*90*/ 0,
2157        0,
2158        0,
2159        0,
2160        0,
2161 /*95*/ 0,
2162        0,
2163        0,
2164        0};
2165 
2166 EXTERN_C_BEGIN
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2169 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2170 {
2171   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2172   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   if (aij->nonew != 1) {
2177     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2178   }
2179 
2180   /* allocate space for values if not already there */
2181   if (!aij->saved_values) {
2182     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2183   }
2184 
2185   /* copy values over */
2186   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 EXTERN_C_END
2190 
2191 EXTERN_C_BEGIN
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2194 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2195 {
2196   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2197   PetscErrorCode ierr;
2198   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2199 
2200   PetscFunctionBegin;
2201   if (aij->nonew != 1) {
2202     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2203   }
2204   if (!aij->saved_values) {
2205     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2206   }
2207 
2208   /* copy values over */
2209   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 EXTERN_C_END
2213 
2214 EXTERN_C_BEGIN
2215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2217 EXTERN_C_END
2218 
2219 EXTERN_C_BEGIN
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2222 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2223 {
2224   Mat_SeqBAIJ    *b;
2225   PetscErrorCode ierr;
2226   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2227   PetscTruth     flg;
2228 
2229   PetscFunctionBegin;
2230 
2231   B->preallocated = PETSC_TRUE;
2232   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2233   if (nnz && newbs != bs) {
2234     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2235   }
2236   bs   = newbs;
2237 
2238   mbs  = B->m/bs;
2239   nbs  = B->n/bs;
2240   bs2  = bs*bs;
2241 
2242   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2243     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2244   }
2245 
2246   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2247   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2248   if (nnz) {
2249     for (i=0; i<mbs; i++) {
2250       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2251       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2252     }
2253   }
2254 
2255   b       = (Mat_SeqBAIJ*)B->data;
2256   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2257   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2258   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2259   if (!flg) {
2260     switch (bs) {
2261     case 1:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2263       B->ops->mult            = MatMult_SeqBAIJ_1;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2265       break;
2266     case 2:
2267       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2268       B->ops->mult            = MatMult_SeqBAIJ_2;
2269       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2270       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2271       break;
2272     case 3:
2273       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2274       B->ops->mult            = MatMult_SeqBAIJ_3;
2275       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2276       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2277       break;
2278     case 4:
2279       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2280       B->ops->mult            = MatMult_SeqBAIJ_4;
2281       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2282       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2283       break;
2284     case 5:
2285       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2286       B->ops->mult            = MatMult_SeqBAIJ_5;
2287       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2288       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2289       break;
2290     case 6:
2291       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2292       B->ops->mult            = MatMult_SeqBAIJ_6;
2293       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2294       break;
2295     case 7:
2296       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2297       B->ops->mult            = MatMult_SeqBAIJ_7;
2298       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2299       break;
2300     default:
2301       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2302       B->ops->mult            = MatMult_SeqBAIJ_N;
2303       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2304       break;
2305     }
2306   }
2307   B->bs      = bs;
2308   b->mbs     = mbs;
2309   b->nbs     = nbs;
2310   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2311   if (!nnz) {
2312     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2313     else if (nz <= 0)        nz = 1;
2314     for (i=0; i<mbs; i++) b->imax[i] = nz;
2315     nz = nz*mbs;
2316   } else {
2317     nz = 0;
2318     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2319   }
2320 
2321   /* allocate the matrix space */
2322   ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2323   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2324   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2325   b->singlemalloc = PETSC_TRUE;
2326 
2327   b->i[0] = 0;
2328   for (i=1; i<mbs+1; i++) {
2329     b->i[i] = b->i[i-1] + b->imax[i-1];
2330   }
2331 
2332   /* b->ilen will count nonzeros in each block row so far. */
2333   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2334   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2335 
2336   B->bs               = bs;
2337   b->bs2              = bs2;
2338   b->mbs              = mbs;
2339   b->nz               = 0;
2340   b->maxnz            = nz*bs2;
2341   B->info.nz_unneeded = (PetscReal)b->maxnz;
2342   PetscFunctionReturn(0);
2343 }
2344 EXTERN_C_END
2345 
2346 /*MC
2347    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2348    block sparse compressed row format.
2349 
2350    Options Database Keys:
2351 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2352 
2353   Level: beginner
2354 
2355 .seealso: MatCreateSeqBAIJ
2356 M*/
2357 
2358 EXTERN_C_BEGIN
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatCreate_SeqBAIJ"
2361 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2362 {
2363   PetscErrorCode ierr;
2364   PetscMPIInt    size;
2365   Mat_SeqBAIJ    *b;
2366 
2367   PetscFunctionBegin;
2368   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2369   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2370 
2371   B->m = B->M = PetscMax(B->m,B->M);
2372   B->n = B->N = PetscMax(B->n,B->N);
2373   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2374   B->data = (void*)b;
2375   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2376   B->factor           = 0;
2377   B->mapping          = 0;
2378   b->row              = 0;
2379   b->col              = 0;
2380   b->icol             = 0;
2381   b->reallocs         = 0;
2382   b->saved_values     = 0;
2383 #if defined(PETSC_USE_MAT_SINGLE)
2384   b->setvalueslen     = 0;
2385   b->setvaluescopy    = PETSC_NULL;
2386 #endif
2387 
2388   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2389   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2390 
2391   b->sorted           = PETSC_FALSE;
2392   b->roworiented      = PETSC_TRUE;
2393   b->nonew            = 0;
2394   b->diag             = 0;
2395   b->solve_work       = 0;
2396   b->mult_work        = 0;
2397   B->spptr            = 0;
2398   B->info.nz_unneeded = (PetscReal)b->maxnz;
2399   b->keepzeroedrows   = PETSC_FALSE;
2400   b->xtoy              = 0;
2401   b->XtoY              = 0;
2402   b->compressedrow.use     = PETSC_FALSE;
2403   b->compressedrow.nrows   = 0;
2404   b->compressedrow.i       = PETSC_NULL;
2405   b->compressedrow.rindex  = PETSC_NULL;
2406   b->compressedrow.checked = PETSC_FALSE;
2407   B->same_nonzero          = PETSC_FALSE;
2408 
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2410                                      "MatInvertBlockDiagonal_SeqBAIJ",
2411                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2413                                      "MatStoreValues_SeqBAIJ",
2414                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2416                                      "MatRetrieveValues_SeqBAIJ",
2417                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2419                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2420                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2421   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2422                                      "MatConvert_SeqBAIJ_SeqAIJ",
2423                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2424 #if !defined(PETSC_USE_64BIT_INT)
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2426                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2427                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2428 #endif
2429   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2430                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2431                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 EXTERN_C_END
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2438 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2439 {
2440   Mat            C;
2441   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2442   PetscErrorCode ierr;
2443   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2444 
2445   PetscFunctionBegin;
2446   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2447 
2448   *B = 0;
2449   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2450   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2451   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2452   c    = (Mat_SeqBAIJ*)C->data;
2453 
2454   C->M   = A->M;
2455   C->N   = A->N;
2456   C->bs  = A->bs;
2457   c->bs2 = a->bs2;
2458   c->mbs = a->mbs;
2459   c->nbs = a->nbs;
2460 
2461   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2462   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2463   for (i=0; i<mbs; i++) {
2464     c->imax[i] = a->imax[i];
2465     c->ilen[i] = a->ilen[i];
2466   }
2467 
2468   /* allocate the matrix space */
2469   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2470   c->singlemalloc = PETSC_TRUE;
2471   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2472   if (mbs > 0) {
2473     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2474     if (cpvalues == MAT_COPY_VALUES) {
2475       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2476     } else {
2477       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2478     }
2479   }
2480   c->sorted      = a->sorted;
2481   c->roworiented = a->roworiented;
2482   c->nonew       = a->nonew;
2483 
2484   if (a->diag) {
2485     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2486     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2487     for (i=0; i<mbs; i++) {
2488       c->diag[i] = a->diag[i];
2489     }
2490   } else c->diag        = 0;
2491   c->nz                 = a->nz;
2492   c->maxnz              = a->maxnz;
2493   c->solve_work         = 0;
2494   c->mult_work          = 0;
2495   C->preallocated       = PETSC_TRUE;
2496   C->assembled          = PETSC_TRUE;
2497 
2498   c->compressedrow.use     = a->compressedrow.use;
2499   c->compressedrow.nrows   = a->compressedrow.nrows;
2500   c->compressedrow.checked = a->compressedrow.checked;
2501   if ( a->compressedrow.checked && a->compressedrow.use){
2502     i = a->compressedrow.nrows;
2503     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2504     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2505     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2506     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2507   } else {
2508     c->compressedrow.use    = PETSC_FALSE;
2509     c->compressedrow.i      = PETSC_NULL;
2510     c->compressedrow.rindex = PETSC_NULL;
2511   }
2512   C->same_nonzero = A->same_nonzero;
2513   *B = C;
2514   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatLoad_SeqBAIJ"
2520 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2521 {
2522   Mat_SeqBAIJ    *a;
2523   Mat            B;
2524   PetscErrorCode ierr;
2525   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2526   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2527   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2528   PetscInt       *masked,nmask,tmp,bs2,ishift;
2529   PetscMPIInt    size;
2530   int            fd;
2531   PetscScalar    *aa;
2532   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2533 
2534   PetscFunctionBegin;
2535   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2536   bs2  = bs*bs;
2537 
2538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2539   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2540   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2541   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2542   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2543   M = header[1]; N = header[2]; nz = header[3];
2544 
2545   if (header[3] < 0) {
2546     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2547   }
2548 
2549   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2550 
2551   /*
2552      This code adds extra rows to make sure the number of rows is
2553     divisible by the blocksize
2554   */
2555   mbs        = M/bs;
2556   extra_rows = bs - M + bs*(mbs);
2557   if (extra_rows == bs) extra_rows = 0;
2558   else                  mbs++;
2559   if (extra_rows) {
2560     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2561   }
2562 
2563   /* read in row lengths */
2564   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2565   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2566   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2567 
2568   /* read in column indices */
2569   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2570   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2571   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2572 
2573   /* loop over row lengths determining block row lengths */
2574   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2575   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2576   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2577   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2578   masked   = mask + mbs;
2579   rowcount = 0; nzcount = 0;
2580   for (i=0; i<mbs; i++) {
2581     nmask = 0;
2582     for (j=0; j<bs; j++) {
2583       kmax = rowlengths[rowcount];
2584       for (k=0; k<kmax; k++) {
2585         tmp = jj[nzcount++]/bs;
2586         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2587       }
2588       rowcount++;
2589     }
2590     browlengths[i] += nmask;
2591     /* zero out the mask elements we set */
2592     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2593   }
2594 
2595   /* create our matrix */
2596   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2597   ierr = MatSetType(B,type);CHKERRQ(ierr);
2598   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2599   a = (Mat_SeqBAIJ*)B->data;
2600 
2601   /* set matrix "i" values */
2602   a->i[0] = 0;
2603   for (i=1; i<= mbs; i++) {
2604     a->i[i]      = a->i[i-1] + browlengths[i-1];
2605     a->ilen[i-1] = browlengths[i-1];
2606   }
2607   a->nz         = 0;
2608   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2609 
2610   /* read in nonzero values */
2611   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2612   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2613   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2614 
2615   /* set "a" and "j" values into matrix */
2616   nzcount = 0; jcount = 0;
2617   for (i=0; i<mbs; i++) {
2618     nzcountb = nzcount;
2619     nmask    = 0;
2620     for (j=0; j<bs; j++) {
2621       kmax = rowlengths[i*bs+j];
2622       for (k=0; k<kmax; k++) {
2623         tmp = jj[nzcount++]/bs;
2624 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2625       }
2626     }
2627     /* sort the masked values */
2628     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2629 
2630     /* set "j" values into matrix */
2631     maskcount = 1;
2632     for (j=0; j<nmask; j++) {
2633       a->j[jcount++]  = masked[j];
2634       mask[masked[j]] = maskcount++;
2635     }
2636     /* set "a" values into matrix */
2637     ishift = bs2*a->i[i];
2638     for (j=0; j<bs; j++) {
2639       kmax = rowlengths[i*bs+j];
2640       for (k=0; k<kmax; k++) {
2641         tmp       = jj[nzcountb]/bs ;
2642         block     = mask[tmp] - 1;
2643         point     = jj[nzcountb] - bs*tmp;
2644         idx       = ishift + bs2*block + j + bs*point;
2645         a->a[idx] = (MatScalar)aa[nzcountb++];
2646       }
2647     }
2648     /* zero out the mask elements we set */
2649     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2650   }
2651   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2652 
2653   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2654   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2655   ierr = PetscFree(aa);CHKERRQ(ierr);
2656   ierr = PetscFree(jj);CHKERRQ(ierr);
2657   ierr = PetscFree(mask);CHKERRQ(ierr);
2658 
2659   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661   ierr = MatView_Private(B);CHKERRQ(ierr);
2662 
2663   *A = B;
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatCreateSeqBAIJ"
2669 /*@C
2670    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2671    compressed row) format.  For good matrix assembly performance the
2672    user should preallocate the matrix storage by setting the parameter nz
2673    (or the array nnz).  By setting these parameters accurately, performance
2674    during matrix assembly can be increased by more than a factor of 50.
2675 
2676    Collective on MPI_Comm
2677 
2678    Input Parameters:
2679 +  comm - MPI communicator, set to PETSC_COMM_SELF
2680 .  bs - size of block
2681 .  m - number of rows
2682 .  n - number of columns
2683 .  nz - number of nonzero blocks  per block row (same for all rows)
2684 -  nnz - array containing the number of nonzero blocks in the various block rows
2685          (possibly different for each block row) or PETSC_NULL
2686 
2687    Output Parameter:
2688 .  A - the matrix
2689 
2690    Options Database Keys:
2691 .   -mat_no_unroll - uses code that does not unroll the loops in the
2692                      block calculations (much slower)
2693 .    -mat_block_size - size of the blocks to use
2694 
2695    Level: intermediate
2696 
2697    Notes:
2698    The number of rows and columns must be divisible by blocksize.
2699 
2700    If the nnz parameter is given then the nz parameter is ignored
2701 
2702    A nonzero block is any block that as 1 or more nonzeros in it
2703 
2704    The block AIJ format is fully compatible with standard Fortran 77
2705    storage.  That is, the stored row and column indices can begin at
2706    either one (as in Fortran) or zero.  See the users' manual for details.
2707 
2708    Specify the preallocated storage with either nz or nnz (not both).
2709    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2710    allocation.  For additional details, see the users manual chapter on
2711    matrices.
2712 
2713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2714 @*/
2715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2716 {
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2721   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2722   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2728 /*@C
2729    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2730    per row in the matrix. For good matrix assembly performance the
2731    user should preallocate the matrix storage by setting the parameter nz
2732    (or the array nnz).  By setting these parameters accurately, performance
2733    during matrix assembly can be increased by more than a factor of 50.
2734 
2735    Collective on MPI_Comm
2736 
2737    Input Parameters:
2738 +  A - the matrix
2739 .  bs - size of block
2740 .  nz - number of block nonzeros per block row (same for all rows)
2741 -  nnz - array containing the number of block nonzeros in the various block rows
2742          (possibly different for each block row) or PETSC_NULL
2743 
2744    Options Database Keys:
2745 .   -mat_no_unroll - uses code that does not unroll the loops in the
2746                      block calculations (much slower)
2747 .    -mat_block_size - size of the blocks to use
2748 
2749    Level: intermediate
2750 
2751    Notes:
2752    If the nnz parameter is given then the nz parameter is ignored
2753 
2754    The block AIJ format is fully compatible with standard Fortran 77
2755    storage.  That is, the stored row and column indices can begin at
2756    either one (as in Fortran) or zero.  See the users' manual for details.
2757 
2758    Specify the preallocated storage with either nz or nnz (not both).
2759    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2760    allocation.  For additional details, see the users manual chapter on
2761    matrices.
2762 
2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2764 @*/
2765 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2766 {
2767   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2768 
2769   PetscFunctionBegin;
2770   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2771   if (f) {
2772     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777