xref: /petsc/src/mat/impls/baij/seq/baij.c (revision df48e8d96c19df54efdde2c76f874a16fcafd8fe)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1009   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1010   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012   ierr = PetscFree(col);CHKERRQ(ierr);
1013   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1014   cols = rows + bs;
1015   for (i=0; i<mbs; i++) {
1016     cols[0] = i*bs;
1017     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018     len = ai[i+1] - ai[i];
1019     for (j=0; j<len; j++) {
1020       rows[0] = (*aj++)*bs;
1021       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1023       array += bs2;
1024     }
1025   }
1026   ierr = PetscFree(rows);CHKERRQ(ierr);
1027 #if defined(PETSC_USE_MAT_SINGLE)
1028   ierr = PetscFree(array);
1029 #endif
1030 
1031   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033 
1034   if (B) {
1035     *B = C;
1036   } else {
1037     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045 {
1046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1047   PetscErrorCode ierr;
1048   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049   int            fd;
1050   PetscScalar    *aa;
1051   FILE           *file;
1052 
1053   PetscFunctionBegin;
1054   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056   col_lens[0] = MAT_FILE_COOKIE;
1057 
1058   col_lens[1] = A->m;
1059   col_lens[2] = A->n;
1060   col_lens[3] = a->nz*bs2;
1061 
1062   /* store lengths of each row and write (including header) to file */
1063   count = 0;
1064   for (i=0; i<a->mbs; i++) {
1065     for (j=0; j<bs; j++) {
1066       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067     }
1068   }
1069   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1071 
1072   /* store column indices (zero start index) */
1073   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078         for (l=0; l<bs; l++) {
1079           jj[count++] = bs*a->j[k] + l;
1080         }
1081       }
1082     }
1083   }
1084   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085   ierr = PetscFree(jj);CHKERRQ(ierr);
1086 
1087   /* store nonzero values */
1088   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089   count = 0;
1090   for (i=0; i<a->mbs; i++) {
1091     for (j=0; j<bs; j++) {
1092       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093         for (l=0; l<bs; l++) {
1094           aa[count++] = a->a[bs2*k + l*bs + j];
1095         }
1096       }
1097     }
1098   }
1099   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100   ierr = PetscFree(aa);CHKERRQ(ierr);
1101 
1102   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103   if (file) {
1104     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112 {
1113   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode    ierr;
1115   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116   PetscViewerFormat format;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123     Mat aij;
1124     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126     ierr = MatDestroy(aij);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128      PetscFunctionReturn(0);
1129   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1131     for (i=0; i<a->mbs; i++) {
1132       for (j=0; j<bs; j++) {
1133         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1134         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135           for (l=0; l<bs; l++) {
1136 #if defined(PETSC_USE_COMPLEX)
1137             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1143             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             }
1146 #else
1147             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1149             }
1150 #endif
1151           }
1152         }
1153         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1154       }
1155     }
1156     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159     for (i=0; i<a->mbs; i++) {
1160       for (j=0; j<bs; j++) {
1161         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163           for (l=0; l<bs; l++) {
1164 #if defined(PETSC_USE_COMPLEX)
1165             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1171             } else {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             }
1174 #else
1175             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1176 #endif
1177           }
1178         }
1179         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1180       }
1181     }
1182     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183   }
1184   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191 {
1192   Mat            A = (Mat) Aa;
1193   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1194   PetscErrorCode ierr;
1195   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197   MatScalar      *aa;
1198   PetscViewer    viewer;
1199 
1200   PetscFunctionBegin;
1201 
1202   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1204 
1205   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1206 
1207   /* loop over matrix elements drawing boxes */
1208   color = PETSC_DRAW_BLUE;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) >=  0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   color = PETSC_DRAW_CYAN;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) != 0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236 
1237   color = PETSC_DRAW_RED;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) <= 0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscReal      xl,yl,xr,yr,w,h;
1260   PetscDraw      draw;
1261   PetscTruth     isnull;
1262 
1263   PetscFunctionBegin;
1264 
1265   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1267 
1268   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270   xr += w;    yr += h;  xl = -w;     yl = -h;
1271   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ"
1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscTruth     iascii,isbinary,isdraw;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1288   if (iascii){
1289     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1290   } else if (isbinary) {
1291     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1292   } else if (isdraw) {
1293     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1294   } else {
1295     Mat B;
1296     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298     ierr = MatDestroy(B);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307 {
1308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310   PetscInt    *ai = a->i,*ailen = a->ilen;
1311   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313 
1314   PetscFunctionBegin;
1315   for (k=0; k<m; k++) { /* loop over rows */
1316     row  = im[k]; brow = row/bs;
1317     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320     nrow = ailen[brow];
1321     for (l=0; l<n; l++) { /* loop over columns */
1322       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324       col  = in[l] ;
1325       bcol = col/bs;
1326       cidx = col%bs;
1327       ridx = row%bs;
1328       high = nrow;
1329       low  = 0; /* assume unsorted */
1330       while (high-low > 5) {
1331         t = (low+high)/2;
1332         if (rp[t] > bcol) high = t;
1333         else             low  = t;
1334       }
1335       for (i=low; i<high; i++) {
1336         if (rp[i] > bcol) break;
1337         if (rp[i] == bcol) {
1338           *v++ = ap[bs2*i+bs*cidx+ridx];
1339           goto finished;
1340         }
1341       }
1342       *v++ = zero;
1343       finished:;
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_USE_MAT_SINGLE)
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353 {
1354   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355   PetscErrorCode ierr;
1356   PetscInt       i,N = m*n*b->bs2;
1357   MatScalar      *vsingle;
1358 
1359   PetscFunctionBegin;
1360   if (N > b->setvalueslen) {
1361     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363     b->setvalueslen  = N;
1364   }
1365   vsingle = b->setvaluescopy;
1366   for (i=0; i<N; i++) {
1367     vsingle[i] = v[i];
1368   }
1369   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 #endif
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378 {
1379   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382   PetscErrorCode  ierr;
1383   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384   PetscTruth      roworiented=a->roworiented;
1385   const MatScalar *value = v;
1386   MatScalar       *ap,*aa = a->a,*bap;
1387 
1388   PetscFunctionBegin;
1389   if (roworiented) {
1390     stepval = (n-1)*bs;
1391   } else {
1392     stepval = (m-1)*bs;
1393   }
1394   for (k=0; k<m; k++) { /* loop over added rows */
1395     row  = im[k];
1396     if (row < 0) continue;
1397 #if defined(PETSC_USE_DEBUG)
1398     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399 #endif
1400     rp   = aj + ai[row];
1401     ap   = aa + bs2*ai[row];
1402     rmax = imax[row];
1403     nrow = ailen[row];
1404     low  = 0;
1405     high = nrow;
1406     for (l=0; l<n; l++) { /* loop over added columns */
1407       if (in[l] < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410 #endif
1411       col = in[l];
1412       if (roworiented) {
1413         value = v + k*(stepval+bs)*bs + l*bs;
1414       } else {
1415         value = v + l*(stepval+bs)*bs + k*bs;
1416       }
1417       if (col < lastcol) low = 0; else high = nrow;
1418       lastcol = col;
1419       while (high-low > 7) {
1420         t = (low+high)/2;
1421         if (rp[t] > col) high = t;
1422         else             low  = t;
1423       }
1424       for (i=low; i<high; i++) {
1425         if (rp[i] > col) break;
1426         if (rp[i] == col) {
1427           bap  = ap +  bs2*i;
1428           if (roworiented) {
1429             if (is == ADD_VALUES) {
1430               for (ii=0; ii<bs; ii++,value+=stepval) {
1431                 for (jj=ii; jj<bs2; jj+=bs) {
1432                   bap[jj] += *value++;
1433                 }
1434               }
1435             } else {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] = *value++;
1439                 }
1440               }
1441             }
1442           } else {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=0; jj<bs; jj++) {
1446                   *bap++ += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++  = *value++;
1453                 }
1454               }
1455             }
1456           }
1457           goto noinsert2;
1458         }
1459       }
1460       if (nonew == 1) goto noinsert2;
1461       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1463       N = nrow++ - 1;
1464       /* shift up all the later entries in this row */
1465       for (ii=N; ii>=i; ii--) {
1466         rp[ii+1] = rp[ii];
1467         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1468       }
1469       if (N >= i) {
1470         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       rp[i] = col;
1473       bap   = ap +  bs2*i;
1474       if (roworiented) {
1475         for (ii=0; ii<bs; ii++,value+=stepval) {
1476           for (jj=ii; jj<bs2; jj+=bs) {
1477             bap[jj] = *value++;
1478           }
1479         }
1480       } else {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=0; jj<bs; jj++) {
1483             *bap++  = *value++;
1484           }
1485         }
1486       }
1487       noinsert2:;
1488       low = i;
1489     }
1490     ailen[row] = nrow;
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498 {
1499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1502   PetscErrorCode ierr;
1503   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504   MatScalar      *aa = a->a,*ap;
1505   PetscReal      ratio=0.6;
1506 
1507   PetscFunctionBegin;
1508   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1509 
1510   if (m) rmax = ailen[0];
1511   for (i=1; i<mbs; i++) {
1512     /* move each row back by the amount of empty slots (fshift) before it*/
1513     fshift += imax[i-1] - ailen[i-1];
1514     rmax   = PetscMax(rmax,ailen[i]);
1515     if (fshift) {
1516       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1517       N = ailen[i];
1518       for (j=0; j<N; j++) {
1519         ip[j-fshift] = ip[j];
1520         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1521       }
1522     }
1523     ai[i] = ai[i-1] + ailen[i-1];
1524   }
1525   if (mbs) {
1526     fshift += imax[mbs-1] - ailen[mbs-1];
1527     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1528   }
1529   /* reset ilen and imax for each row */
1530   for (i=0; i<mbs; i++) {
1531     ailen[i] = imax[i] = ai[i+1] - ai[i];
1532   }
1533   a->nz = ai[mbs];
1534 
1535   /* diagonals may have moved, so kill the diagonal pointers */
1536   a->idiagvalid = PETSC_FALSE;
1537   if (fshift && a->diag) {
1538     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1539     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1540     a->diag = 0;
1541   }
1542   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1543   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1544   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1545   a->reallocs          = 0;
1546   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1547 
1548   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1549   if (a->compressedrow.use){
1550     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1551   }
1552 
1553   A->same_nonzero = PETSC_TRUE;
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /*
1558    This function returns an array of flags which indicate the locations of contiguous
1559    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1560    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1561    Assume: sizes should be long enough to hold all the values.
1562 */
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1565 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1566 {
1567   PetscInt   i,j,k,row;
1568   PetscTruth flg;
1569 
1570   PetscFunctionBegin;
1571   for (i=0,j=0; i<n; j++) {
1572     row = idx[i];
1573     if (row%bs!=0) { /* Not the begining of a block */
1574       sizes[j] = 1;
1575       i++;
1576     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1577       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1578       i++;
1579     } else { /* Begining of the block, so check if the complete block exists */
1580       flg = PETSC_TRUE;
1581       for (k=1; k<bs; k++) {
1582         if (row+k != idx[i+k]) { /* break in the block */
1583           flg = PETSC_FALSE;
1584           break;
1585         }
1586       }
1587       if (flg) { /* No break in the bs */
1588         sizes[j] = bs;
1589         i+= bs;
1590       } else {
1591         sizes[j] = 1;
1592         i++;
1593       }
1594     }
1595   }
1596   *bs_max = j;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1602 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1603 {
1604   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1607   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1608   PetscScalar    zero = 0.0;
1609   MatScalar      *aa;
1610 
1611   PetscFunctionBegin;
1612   /* Make a copy of the IS and  sort it */
1613   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1614   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1615 
1616   /* allocate memory for rows,sizes */
1617   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1618   sizes = rows + is_n;
1619 
1620   /* copy IS values to rows, and sort them */
1621   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1622   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1623   if (baij->keepzeroedrows) {
1624     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1625     bs_max = is_n;
1626     A->same_nonzero = PETSC_TRUE;
1627   } else {
1628     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1629     A->same_nonzero = PETSC_FALSE;
1630   }
1631   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1632 
1633   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1634     row   = rows[j];
1635     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1636     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1637     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1638     if (sizes[i] == bs && !baij->keepzeroedrows) {
1639       if (diag) {
1640         if (baij->ilen[row/bs] > 0) {
1641           baij->ilen[row/bs]       = 1;
1642           baij->j[baij->i[row/bs]] = row/bs;
1643           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1644         }
1645         /* Now insert all the diagonal values for this bs */
1646         for (k=0; k<bs; k++) {
1647           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1648         }
1649       } else { /* (!diag) */
1650         baij->ilen[row/bs] = 0;
1651       } /* end (!diag) */
1652     } else { /* (sizes[i] != bs) */
1653 #if defined (PETSC_USE_DEBUG)
1654       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1655 #endif
1656       for (k=0; k<count; k++) {
1657         aa[0] =  zero;
1658         aa    += bs;
1659       }
1660       if (diag) {
1661         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1662       }
1663     }
1664   }
1665 
1666   ierr = PetscFree(rows);CHKERRQ(ierr);
1667   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1673 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1674 {
1675   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1676   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1677   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1678   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1679   PetscErrorCode ierr;
1680   PetscInt       ridx,cidx,bs2=a->bs2;
1681   PetscTruth     roworiented=a->roworiented;
1682   MatScalar      *ap,value,*aa=a->a,*bap;
1683 
1684   PetscFunctionBegin;
1685   for (k=0; k<m; k++) { /* loop over added rows */
1686     row  = im[k];
1687     brow = row/bs;
1688     if (row < 0) continue;
1689 #if defined(PETSC_USE_DEBUG)
1690     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1691 #endif
1692     rp   = aj + ai[brow];
1693     ap   = aa + bs2*ai[brow];
1694     rmax = imax[brow];
1695     nrow = ailen[brow];
1696     low  = 0;
1697     high = nrow;
1698     for (l=0; l<n; l++) { /* loop over added columns */
1699       if (in[l] < 0) continue;
1700 #if defined(PETSC_USE_DEBUG)
1701       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1702 #endif
1703       col = in[l]; bcol = col/bs;
1704       ridx = row % bs; cidx = col % bs;
1705       if (roworiented) {
1706         value = v[l + k*n];
1707       } else {
1708         value = v[k + l*m];
1709       }
1710       if (col < lastcol) low = 0; else high = nrow;
1711       lastcol = col;
1712       while (high-low > 7) {
1713         t = (low+high)/2;
1714         if (rp[t] > bcol) high = t;
1715         else              low  = t;
1716       }
1717       for (i=low; i<high; i++) {
1718         if (rp[i] > bcol) break;
1719         if (rp[i] == bcol) {
1720           bap  = ap +  bs2*i + bs*cidx + ridx;
1721           if (is == ADD_VALUES) *bap += value;
1722           else                  *bap  = value;
1723           goto noinsert1;
1724         }
1725       }
1726       if (nonew == 1) goto noinsert1;
1727       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1728       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1729       N = nrow++ - 1;
1730       /* shift up all the later entries in this row */
1731       for (ii=N; ii>=i; ii--) {
1732         rp[ii+1] = rp[ii];
1733         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1734       }
1735       if (N>=i) {
1736         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1737       }
1738       rp[i]                      = bcol;
1739       ap[bs2*i + bs*cidx + ridx] = value;
1740       a->nz++;
1741       noinsert1:;
1742       low = i;
1743     }
1744     ailen[brow] = nrow;
1745   }
1746   A->same_nonzero = PETSC_FALSE;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1753 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1754 {
1755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1756   Mat            outA;
1757   PetscErrorCode ierr;
1758   PetscTruth     row_identity,col_identity;
1759 
1760   PetscFunctionBegin;
1761   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1762   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1763   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1764   if (!row_identity || !col_identity) {
1765     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1766   }
1767 
1768   outA          = inA;
1769   inA->factor   = FACTOR_LU;
1770 
1771   if (!a->diag) {
1772     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1773   }
1774 
1775   a->row        = row;
1776   a->col        = col;
1777   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1778   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1779 
1780   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1781   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1782   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1783 
1784   /*
1785       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1786       for ILU(0) factorization with natural ordering
1787   */
1788   if (inA->bs < 8) {
1789     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1790   } else {
1791     if (!a->solve_work) {
1792       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1793       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1794     }
1795   }
1796 
1797   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1798 
1799   PetscFunctionReturn(0);
1800 }
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1803 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1804 {
1805   static PetscTruth called = PETSC_FALSE;
1806   MPI_Comm          comm = A->comm;
1807   PetscErrorCode    ierr;
1808 
1809   PetscFunctionBegin;
1810   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1811   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1812   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 EXTERN_C_BEGIN
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1819 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1820 {
1821   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1822   PetscInt    i,nz,nbs;
1823 
1824   PetscFunctionBegin;
1825   nz  = baij->maxnz/baij->bs2;
1826   nbs = baij->nbs;
1827   for (i=0; i<nz; i++) {
1828     baij->j[i] = indices[i];
1829   }
1830   baij->nz = nz;
1831   for (i=0; i<nbs; i++) {
1832     baij->ilen[i] = baij->imax[i];
1833   }
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 EXTERN_C_END
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1841 /*@
1842     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1843        in the matrix.
1844 
1845   Input Parameters:
1846 +  mat - the SeqBAIJ matrix
1847 -  indices - the column indices
1848 
1849   Level: advanced
1850 
1851   Notes:
1852     This can be called if you have precomputed the nonzero structure of the
1853   matrix and want to provide it to the matrix object to improve the performance
1854   of the MatSetValues() operation.
1855 
1856     You MUST have set the correct numbers of nonzeros per row in the call to
1857   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1858 
1859     MUST be called before any calls to MatSetValues();
1860 
1861 @*/
1862 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1863 {
1864   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1865 
1866   PetscFunctionBegin;
1867   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1868   PetscValidPointer(indices,2);
1869   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1870   if (f) {
1871     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1872   } else {
1873     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1880 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1881 {
1882   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1883   PetscErrorCode ierr;
1884   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1885   PetscReal      atmp;
1886   PetscScalar    *x,zero = 0.0;
1887   MatScalar      *aa;
1888   PetscInt       ncols,brow,krow,kcol;
1889 
1890   PetscFunctionBegin;
1891   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1892   bs   = A->bs;
1893   aa   = a->a;
1894   ai   = a->i;
1895   aj   = a->j;
1896   mbs = a->mbs;
1897 
1898   ierr = VecSet(v,zero);CHKERRQ(ierr);
1899   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1900   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1901   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1902   for (i=0; i<mbs; i++) {
1903     ncols = ai[1] - ai[0]; ai++;
1904     brow  = bs*i;
1905     for (j=0; j<ncols; j++){
1906       /* bcol = bs*(*aj); */
1907       for (kcol=0; kcol<bs; kcol++){
1908         for (krow=0; krow<bs; krow++){
1909           atmp = PetscAbsScalar(*aa); aa++;
1910           row = brow + krow;    /* row index */
1911           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1912           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1913         }
1914       }
1915       aj++;
1916     }
1917   }
1918   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1924 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1925 {
1926   PetscErrorCode ierr;
1927 
1928   PetscFunctionBegin;
1929   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1935 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1936 {
1937   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1938   PetscFunctionBegin;
1939   *array = a->a;
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1945 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1946 {
1947   PetscFunctionBegin;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #include "petscblaslapack.h"
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1954 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1955 {
1956   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1957   PetscErrorCode ierr;
1958   PetscInt       i,bs=Y->bs,j,bs2;
1959   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1960 
1961   PetscFunctionBegin;
1962   if (str == SAME_NONZERO_PATTERN) {
1963     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1964   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1965     if (y->xtoy && y->XtoY != X) {
1966       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1967       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1968     }
1969     if (!y->xtoy) { /* get xtoy */
1970       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1971       y->XtoY = X;
1972     }
1973     bs2 = bs*bs;
1974     for (i=0; i<x->nz; i++) {
1975       j = 0;
1976       while (j < bs2){
1977         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1978         j++;
1979       }
1980     }
1981     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
1982   } else {
1983     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 /* -------------------------------------------------------------------*/
1989 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1990        MatGetRow_SeqBAIJ,
1991        MatRestoreRow_SeqBAIJ,
1992        MatMult_SeqBAIJ_N,
1993 /* 4*/ MatMultAdd_SeqBAIJ_N,
1994        MatMultTranspose_SeqBAIJ,
1995        MatMultTransposeAdd_SeqBAIJ,
1996        MatSolve_SeqBAIJ_N,
1997        0,
1998        0,
1999 /*10*/ 0,
2000        MatLUFactor_SeqBAIJ,
2001        0,
2002        0,
2003        MatTranspose_SeqBAIJ,
2004 /*15*/ MatGetInfo_SeqBAIJ,
2005        MatEqual_SeqBAIJ,
2006        MatGetDiagonal_SeqBAIJ,
2007        MatDiagonalScale_SeqBAIJ,
2008        MatNorm_SeqBAIJ,
2009 /*20*/ 0,
2010        MatAssemblyEnd_SeqBAIJ,
2011        0,
2012        MatSetOption_SeqBAIJ,
2013        MatZeroEntries_SeqBAIJ,
2014 /*25*/ MatZeroRows_SeqBAIJ,
2015        MatLUFactorSymbolic_SeqBAIJ,
2016        MatLUFactorNumeric_SeqBAIJ_N,
2017        MatCholeskyFactorSymbolic_SeqBAIJ,
2018        MatCholeskyFactorNumeric_SeqBAIJ_N,
2019 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2020        MatILUFactorSymbolic_SeqBAIJ,
2021        MatICCFactorSymbolic_SeqBAIJ,
2022        MatGetArray_SeqBAIJ,
2023        MatRestoreArray_SeqBAIJ,
2024 /*35*/ MatDuplicate_SeqBAIJ,
2025        0,
2026        0,
2027        MatILUFactor_SeqBAIJ,
2028        0,
2029 /*40*/ MatAXPY_SeqBAIJ,
2030        MatGetSubMatrices_SeqBAIJ,
2031        MatIncreaseOverlap_SeqBAIJ,
2032        MatGetValues_SeqBAIJ,
2033        0,
2034 /*45*/ MatPrintHelp_SeqBAIJ,
2035        MatScale_SeqBAIJ,
2036        0,
2037        0,
2038        0,
2039 /*50*/ 0,
2040        MatGetRowIJ_SeqBAIJ,
2041        MatRestoreRowIJ_SeqBAIJ,
2042        0,
2043        0,
2044 /*55*/ 0,
2045        0,
2046        0,
2047        0,
2048        MatSetValuesBlocked_SeqBAIJ,
2049 /*60*/ MatGetSubMatrix_SeqBAIJ,
2050        MatDestroy_SeqBAIJ,
2051        MatView_SeqBAIJ,
2052        MatGetPetscMaps_Petsc,
2053        0,
2054 /*65*/ 0,
2055        0,
2056        0,
2057        0,
2058        0,
2059 /*70*/ MatGetRowMax_SeqBAIJ,
2060        MatConvert_Basic,
2061        0,
2062        0,
2063        0,
2064 /*75*/ 0,
2065        0,
2066        0,
2067        0,
2068        0,
2069 /*80*/ 0,
2070        0,
2071        0,
2072        0,
2073        MatLoad_SeqBAIJ,
2074 /*85*/ 0,
2075        0,
2076        0,
2077        0,
2078        0,
2079 /*90*/ 0,
2080        0,
2081        0,
2082        0,
2083        0,
2084 /*95*/ 0,
2085        0,
2086        0,
2087        0};
2088 
2089 EXTERN_C_BEGIN
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2092 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2093 {
2094   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2095   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   if (aij->nonew != 1) {
2100     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2101   }
2102 
2103   /* allocate space for values if not already there */
2104   if (!aij->saved_values) {
2105     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2106   }
2107 
2108   /* copy values over */
2109   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 EXTERN_C_END
2113 
2114 EXTERN_C_BEGIN
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2117 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2118 {
2119   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2120   PetscErrorCode ierr;
2121   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2122 
2123   PetscFunctionBegin;
2124   if (aij->nonew != 1) {
2125     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2126   }
2127   if (!aij->saved_values) {
2128     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2129   }
2130 
2131   /* copy values over */
2132   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 EXTERN_C_END
2136 
2137 EXTERN_C_BEGIN
2138 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2139 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2140 EXTERN_C_END
2141 
2142 EXTERN_C_BEGIN
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2145 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2146 {
2147   Mat_SeqBAIJ    *b;
2148   PetscErrorCode ierr;
2149   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2150   PetscTruth     flg,skipallocation = PETSC_FALSE;
2151 
2152   PetscFunctionBegin;
2153 
2154   if (nz == MAT_SKIP_ALLOCATION) {
2155     skipallocation = PETSC_TRUE;
2156     nz             = 0;
2157   }
2158 
2159   B->preallocated = PETSC_TRUE;
2160   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2161   if (nnz && newbs != bs) {
2162     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2163   }
2164   bs   = newbs;
2165 
2166   mbs  = B->m/bs;
2167   nbs  = B->n/bs;
2168   bs2  = bs*bs;
2169 
2170   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2171     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2172   }
2173 
2174   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2175   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2176   if (nnz) {
2177     for (i=0; i<mbs; i++) {
2178       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2179       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2180     }
2181   }
2182 
2183   b       = (Mat_SeqBAIJ*)B->data;
2184   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2185   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2186   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2187   if (!flg) {
2188     switch (bs) {
2189     case 1:
2190       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2191       B->ops->mult            = MatMult_SeqBAIJ_1;
2192       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2193       break;
2194     case 2:
2195       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2196       B->ops->mult            = MatMult_SeqBAIJ_2;
2197       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2198       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2199       break;
2200     case 3:
2201       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2202       B->ops->mult            = MatMult_SeqBAIJ_3;
2203       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2204       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2205       break;
2206     case 4:
2207       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2208       B->ops->mult            = MatMult_SeqBAIJ_4;
2209       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2210       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2211       break;
2212     case 5:
2213       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2214       B->ops->mult            = MatMult_SeqBAIJ_5;
2215       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2216       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2217       break;
2218     case 6:
2219       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2220       B->ops->mult            = MatMult_SeqBAIJ_6;
2221       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2222       break;
2223     case 7:
2224       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2225       B->ops->mult            = MatMult_SeqBAIJ_7;
2226       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2227       break;
2228     default:
2229       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2230       B->ops->mult            = MatMult_SeqBAIJ_N;
2231       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2232       break;
2233     }
2234   }
2235   B->bs      = bs;
2236   b->mbs     = mbs;
2237   b->nbs     = nbs;
2238   if (!skipallocation) {
2239     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2240     /* b->ilen will count nonzeros in each block row so far. */
2241     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2242     if (!nnz) {
2243       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2244       else if (nz <= 0)        nz = 1;
2245       for (i=0; i<mbs; i++) b->imax[i] = nz;
2246       nz = nz*mbs;
2247     } else {
2248       nz = 0;
2249       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2250     }
2251 
2252     /* allocate the matrix space */
2253     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2254     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2255     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2256     b->singlemalloc = PETSC_TRUE;
2257 
2258     b->i[0] = 0;
2259     for (i=1; i<mbs+1; i++) {
2260       b->i[i] = b->i[i-1] + b->imax[i-1];
2261     }
2262   }
2263 
2264   B->bs               = bs;
2265   b->bs2              = bs2;
2266   b->mbs              = mbs;
2267   b->nz               = 0;
2268   b->maxnz            = nz*bs2;
2269   B->info.nz_unneeded = (PetscReal)b->maxnz;
2270   PetscFunctionReturn(0);
2271 }
2272 EXTERN_C_END
2273 
2274 /*MC
2275    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2276    block sparse compressed row format.
2277 
2278    Options Database Keys:
2279 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2280 
2281   Level: beginner
2282 
2283 .seealso: MatCreateSeqBAIJ
2284 M*/
2285 
2286 EXTERN_C_BEGIN
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatCreate_SeqBAIJ"
2289 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2290 {
2291   PetscErrorCode ierr;
2292   PetscMPIInt    size;
2293   Mat_SeqBAIJ    *b;
2294 
2295   PetscFunctionBegin;
2296   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2297   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2298 
2299   B->m = B->M = PetscMax(B->m,B->M);
2300   B->n = B->N = PetscMax(B->n,B->N);
2301   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2302   B->data = (void*)b;
2303   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2304   B->factor           = 0;
2305   B->mapping          = 0;
2306   b->row              = 0;
2307   b->col              = 0;
2308   b->icol             = 0;
2309   b->reallocs         = 0;
2310   b->saved_values     = 0;
2311 #if defined(PETSC_USE_MAT_SINGLE)
2312   b->setvalueslen     = 0;
2313   b->setvaluescopy    = PETSC_NULL;
2314 #endif
2315 
2316   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2317   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2318 
2319   b->sorted           = PETSC_FALSE;
2320   b->roworiented      = PETSC_TRUE;
2321   b->nonew            = 0;
2322   b->diag             = 0;
2323   b->solve_work       = 0;
2324   b->mult_work        = 0;
2325   B->spptr            = 0;
2326   B->info.nz_unneeded = (PetscReal)b->maxnz;
2327   b->keepzeroedrows   = PETSC_FALSE;
2328   b->xtoy              = 0;
2329   b->XtoY              = 0;
2330   b->compressedrow.use     = PETSC_FALSE;
2331   b->compressedrow.nrows   = 0;
2332   b->compressedrow.i       = PETSC_NULL;
2333   b->compressedrow.rindex  = PETSC_NULL;
2334   b->compressedrow.checked = PETSC_FALSE;
2335   B->same_nonzero          = PETSC_FALSE;
2336 
2337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2338                                      "MatInvertBlockDiagonal_SeqBAIJ",
2339                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2341                                      "MatStoreValues_SeqBAIJ",
2342                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2344                                      "MatRetrieveValues_SeqBAIJ",
2345                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2347                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2348                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2350                                      "MatConvert_SeqBAIJ_SeqAIJ",
2351                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2353                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2354                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2356                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2357                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 EXTERN_C_END
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2364 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2365 {
2366   Mat            C;
2367   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2368   PetscErrorCode ierr;
2369   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2370 
2371   PetscFunctionBegin;
2372   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2373 
2374   *B = 0;
2375   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2376   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2377   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2378   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2379   c    = (Mat_SeqBAIJ*)C->data;
2380 
2381   C->M   = A->M;
2382   C->N   = A->N;
2383   C->bs  = A->bs;
2384   c->bs2 = a->bs2;
2385   c->mbs = a->mbs;
2386   c->nbs = a->nbs;
2387 
2388   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2389   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2390   for (i=0; i<mbs; i++) {
2391     c->imax[i] = a->imax[i];
2392     c->ilen[i] = a->ilen[i];
2393   }
2394 
2395   /* allocate the matrix space */
2396   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2397   c->singlemalloc = PETSC_TRUE;
2398   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2399   if (mbs > 0) {
2400     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2401     if (cpvalues == MAT_COPY_VALUES) {
2402       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2403     } else {
2404       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2405     }
2406   }
2407   c->sorted      = a->sorted;
2408   c->roworiented = a->roworiented;
2409   c->nonew       = a->nonew;
2410 
2411   if (a->diag) {
2412     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2413     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2414     for (i=0; i<mbs; i++) {
2415       c->diag[i] = a->diag[i];
2416     }
2417   } else c->diag        = 0;
2418   c->nz                 = a->nz;
2419   c->maxnz              = a->maxnz;
2420   c->solve_work         = 0;
2421   c->mult_work          = 0;
2422   C->preallocated       = PETSC_TRUE;
2423   C->assembled          = PETSC_TRUE;
2424 
2425   c->compressedrow.use     = a->compressedrow.use;
2426   c->compressedrow.nrows   = a->compressedrow.nrows;
2427   c->compressedrow.checked = a->compressedrow.checked;
2428   if ( a->compressedrow.checked && a->compressedrow.use){
2429     i = a->compressedrow.nrows;
2430     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2431     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2432     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2433     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2434   } else {
2435     c->compressedrow.use    = PETSC_FALSE;
2436     c->compressedrow.i      = PETSC_NULL;
2437     c->compressedrow.rindex = PETSC_NULL;
2438   }
2439   C->same_nonzero = A->same_nonzero;
2440   *B = C;
2441   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatLoad_SeqBAIJ"
2447 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2448 {
2449   Mat_SeqBAIJ    *a;
2450   Mat            B;
2451   PetscErrorCode ierr;
2452   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2453   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2454   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2455   PetscInt       *masked,nmask,tmp,bs2,ishift;
2456   PetscMPIInt    size;
2457   int            fd;
2458   PetscScalar    *aa;
2459   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2463   bs2  = bs*bs;
2464 
2465   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2466   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2467   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2468   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2469   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2470   M = header[1]; N = header[2]; nz = header[3];
2471 
2472   if (header[3] < 0) {
2473     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2474   }
2475 
2476   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2477 
2478   /*
2479      This code adds extra rows to make sure the number of rows is
2480     divisible by the blocksize
2481   */
2482   mbs        = M/bs;
2483   extra_rows = bs - M + bs*(mbs);
2484   if (extra_rows == bs) extra_rows = 0;
2485   else                  mbs++;
2486   if (extra_rows) {
2487     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2488   }
2489 
2490   /* read in row lengths */
2491   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2492   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2493   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2494 
2495   /* read in column indices */
2496   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2497   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2498   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2499 
2500   /* loop over row lengths determining block row lengths */
2501   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2502   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2503   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2504   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2505   masked   = mask + mbs;
2506   rowcount = 0; nzcount = 0;
2507   for (i=0; i<mbs; i++) {
2508     nmask = 0;
2509     for (j=0; j<bs; j++) {
2510       kmax = rowlengths[rowcount];
2511       for (k=0; k<kmax; k++) {
2512         tmp = jj[nzcount++]/bs;
2513         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2514       }
2515       rowcount++;
2516     }
2517     browlengths[i] += nmask;
2518     /* zero out the mask elements we set */
2519     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2520   }
2521 
2522   /* create our matrix */
2523   ierr = MatCreate(comm,&B);
2524   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2525   ierr = MatSetType(B,type);CHKERRQ(ierr);
2526   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2527   a = (Mat_SeqBAIJ*)B->data;
2528 
2529   /* set matrix "i" values */
2530   a->i[0] = 0;
2531   for (i=1; i<= mbs; i++) {
2532     a->i[i]      = a->i[i-1] + browlengths[i-1];
2533     a->ilen[i-1] = browlengths[i-1];
2534   }
2535   a->nz         = 0;
2536   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2537 
2538   /* read in nonzero values */
2539   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2540   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2541   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2542 
2543   /* set "a" and "j" values into matrix */
2544   nzcount = 0; jcount = 0;
2545   for (i=0; i<mbs; i++) {
2546     nzcountb = nzcount;
2547     nmask    = 0;
2548     for (j=0; j<bs; j++) {
2549       kmax = rowlengths[i*bs+j];
2550       for (k=0; k<kmax; k++) {
2551         tmp = jj[nzcount++]/bs;
2552 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2553       }
2554     }
2555     /* sort the masked values */
2556     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2557 
2558     /* set "j" values into matrix */
2559     maskcount = 1;
2560     for (j=0; j<nmask; j++) {
2561       a->j[jcount++]  = masked[j];
2562       mask[masked[j]] = maskcount++;
2563     }
2564     /* set "a" values into matrix */
2565     ishift = bs2*a->i[i];
2566     for (j=0; j<bs; j++) {
2567       kmax = rowlengths[i*bs+j];
2568       for (k=0; k<kmax; k++) {
2569         tmp       = jj[nzcountb]/bs ;
2570         block     = mask[tmp] - 1;
2571         point     = jj[nzcountb] - bs*tmp;
2572         idx       = ishift + bs2*block + j + bs*point;
2573         a->a[idx] = (MatScalar)aa[nzcountb++];
2574       }
2575     }
2576     /* zero out the mask elements we set */
2577     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2578   }
2579   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2580 
2581   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2582   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2583   ierr = PetscFree(aa);CHKERRQ(ierr);
2584   ierr = PetscFree(jj);CHKERRQ(ierr);
2585   ierr = PetscFree(mask);CHKERRQ(ierr);
2586 
2587   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2588   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2589   ierr = MatView_Private(B);CHKERRQ(ierr);
2590 
2591   *A = B;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatCreateSeqBAIJ"
2597 /*@C
2598    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2599    compressed row) format.  For good matrix assembly performance the
2600    user should preallocate the matrix storage by setting the parameter nz
2601    (or the array nnz).  By setting these parameters accurately, performance
2602    during matrix assembly can be increased by more than a factor of 50.
2603 
2604    Collective on MPI_Comm
2605 
2606    Input Parameters:
2607 +  comm - MPI communicator, set to PETSC_COMM_SELF
2608 .  bs - size of block
2609 .  m - number of rows
2610 .  n - number of columns
2611 .  nz - number of nonzero blocks  per block row (same for all rows)
2612 -  nnz - array containing the number of nonzero blocks in the various block rows
2613          (possibly different for each block row) or PETSC_NULL
2614 
2615    Output Parameter:
2616 .  A - the matrix
2617 
2618    Options Database Keys:
2619 .   -mat_no_unroll - uses code that does not unroll the loops in the
2620                      block calculations (much slower)
2621 .    -mat_block_size - size of the blocks to use
2622 
2623    Level: intermediate
2624 
2625    Notes:
2626    The number of rows and columns must be divisible by blocksize.
2627 
2628    If the nnz parameter is given then the nz parameter is ignored
2629 
2630    A nonzero block is any block that as 1 or more nonzeros in it
2631 
2632    The block AIJ format is fully compatible with standard Fortran 77
2633    storage.  That is, the stored row and column indices can begin at
2634    either one (as in Fortran) or zero.  See the users' manual for details.
2635 
2636    Specify the preallocated storage with either nz or nnz (not both).
2637    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2638    allocation.  For additional details, see the users manual chapter on
2639    matrices.
2640 
2641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2642 @*/
2643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2644 {
2645   PetscErrorCode ierr;
2646 
2647   PetscFunctionBegin;
2648   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2649   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2650   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2651   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2657 /*@C
2658    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2659    per row in the matrix. For good matrix assembly performance the
2660    user should preallocate the matrix storage by setting the parameter nz
2661    (or the array nnz).  By setting these parameters accurately, performance
2662    during matrix assembly can be increased by more than a factor of 50.
2663 
2664    Collective on MPI_Comm
2665 
2666    Input Parameters:
2667 +  A - the matrix
2668 .  bs - size of block
2669 .  nz - number of block nonzeros per block row (same for all rows)
2670 -  nnz - array containing the number of block nonzeros in the various block rows
2671          (possibly different for each block row) or PETSC_NULL
2672 
2673    Options Database Keys:
2674 .   -mat_no_unroll - uses code that does not unroll the loops in the
2675                      block calculations (much slower)
2676 .    -mat_block_size - size of the blocks to use
2677 
2678    Level: intermediate
2679 
2680    Notes:
2681    If the nnz parameter is given then the nz parameter is ignored
2682 
2683    The block AIJ format is fully compatible with standard Fortran 77
2684    storage.  That is, the stored row and column indices can begin at
2685    either one (as in Fortran) or zero.  See the users' manual for details.
2686 
2687    Specify the preallocated storage with either nz or nnz (not both).
2688    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2689    allocation.  For additional details, see the users manual chapter on
2690    matrices.
2691 
2692 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2693 @*/
2694 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2695 {
2696   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2697 
2698   PetscFunctionBegin;
2699   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2700   if (f) {
2701     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706