xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3c8e0aa7b286f773d91ee9d90491a3ecc803b931)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     high = nrow;
597     for (l=0; l<n; l++) { /* loop over added columns */
598       col = in[l];
599       if (col <= lastcol) low = 0; else high = nrow;
600       lastcol = col;
601       value = v + k*(stepval+4)*4 + l*4;
602       while (high-low > 7) {
603         t = (low+high)/2;
604         if (rp[t] > col) high = t;
605         else             low  = t;
606       }
607       for (i=low; i<high; i++) {
608         if (rp[i] > col) break;
609         if (rp[i] == col) {
610           bap  = ap +  16*i;
611           for (ii=0; ii<4; ii++,value+=stepval) {
612             for (jj=ii; jj<16; jj+=4) {
613               bap[jj] += *value++;
614             }
615           }
616           goto noinsert2;
617         }
618       }
619       N = nrow++ - 1;
620       high++; /* added new column index thus must search to one higher than before */
621       /* shift up all the later entries in this row */
622       for (ii=N; ii>=i; ii--) {
623         rp[ii+1] = rp[ii];
624         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
625       }
626       if (N >= i) {
627         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
628       }
629       rp[i] = col;
630       bap   = ap +  16*i;
631       for (ii=0; ii<4; ii++,value+=stepval) {
632         for (jj=ii; jj<16; jj+=4) {
633           bap[jj] = *value++;
634         }
635       }
636       noinsert2:;
637       low = i;
638     }
639     ailen[row] = nrow;
640   }
641   PetscFunctionReturnVoid();
642 }
643 EXTERN_C_END
644 
645 #if defined(PETSC_HAVE_FORTRAN_CAPS)
646 #define matsetvalues4_ MATSETVALUES4
647 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
648 #define matsetvalues4_ matsetvalues4
649 #endif
650 
651 EXTERN_C_BEGIN
652 #undef __FUNCT__
653 #define __FUNCT__ "MatSetValues4_"
654 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
655 {
656   Mat         A = *AA;
657   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
658   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
659   PetscInt    *ai=a->i,*ailen=a->ilen;
660   PetscInt    *aj=a->j,brow,bcol;
661   PetscInt    ridx,cidx,lastcol = -1;
662   MatScalar   *ap,value,*aa=a->a,*bap;
663 
664   PetscFunctionBegin;
665   for (k=0; k<m; k++) { /* loop over added rows */
666     row  = im[k]; brow = row/4;
667     rp   = aj + ai[brow];
668     ap   = aa + 16*ai[brow];
669     nrow = ailen[brow];
670     low  = 0;
671     high = nrow;
672     for (l=0; l<n; l++) { /* loop over added columns */
673       col = in[l]; bcol = col/4;
674       ridx = row % 4; cidx = col % 4;
675       value = v[l + k*n];
676       if (col <= lastcol) low = 0; else high = nrow;
677       lastcol = col;
678       while (high-low > 7) {
679         t = (low+high)/2;
680         if (rp[t] > bcol) high = t;
681         else              low  = t;
682       }
683       for (i=low; i<high; i++) {
684         if (rp[i] > bcol) break;
685         if (rp[i] == bcol) {
686           bap  = ap +  16*i + 4*cidx + ridx;
687           *bap += value;
688           goto noinsert1;
689         }
690       }
691       N = nrow++ - 1;
692       high++; /* added new column thus must search to one higher than before */
693       /* shift up all the later entries in this row */
694       for (ii=N; ii>=i; ii--) {
695         rp[ii+1] = rp[ii];
696         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
697       }
698       if (N>=i) {
699         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
700       }
701       rp[i]                    = bcol;
702       ap[16*i + 4*cidx + ridx] = value;
703       noinsert1:;
704       low = i;
705     }
706     ailen[brow] = nrow;
707   }
708   PetscFunctionReturnVoid();
709 }
710 EXTERN_C_END
711 
712 /*  UGLY, ugly, ugly
713    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
714    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
715    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
716    converts the entries into single precision and then calls ..._MatScalar() to put them
717    into the single precision data structures.
718 */
719 #if defined(PETSC_USE_MAT_SINGLE)
720 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
721 #else
722 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
723 #endif
724 
725 #define CHUNKSIZE  10
726 
727 /*
728      Checks for missing diagonals
729 */
730 #undef __FUNCT__
731 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
732 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
733 {
734   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
735   PetscErrorCode ierr;
736   PetscInt       *diag,*jj = a->j,i;
737 
738   PetscFunctionBegin;
739   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
740   diag = a->diag;
741   for (i=0; i<a->mbs; i++) {
742     if (jj[diag[i]] != i) {
743       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
744     }
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNCT__
750 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
751 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
752 {
753   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
754   PetscErrorCode ierr;
755   PetscInt       i,j,*diag,m = a->mbs;
756 
757   PetscFunctionBegin;
758   if (a->diag) PetscFunctionReturn(0);
759 
760   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
761   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
762   for (i=0; i<m; i++) {
763     diag[i] = a->i[i+1];
764     for (j=a->i[i]; j<a->i[i+1]; j++) {
765       if (a->j[j] == i) {
766         diag[i] = j;
767         break;
768       }
769     }
770   }
771   a->diag = diag;
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
844   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
845   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
846   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
847   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
850 #if defined(PETSC_USE_MAT_SINGLE)
851   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
852 #endif
853   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetOption_SeqBAIJ"
870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871 {
872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   switch (op) {
877   case MAT_ROW_ORIENTED:
878     a->roworiented    = PETSC_TRUE;
879     break;
880   case MAT_COLUMN_ORIENTED:
881     a->roworiented    = PETSC_FALSE;
882     break;
883   case MAT_COLUMNS_SORTED:
884     a->sorted         = PETSC_TRUE;
885     break;
886   case MAT_COLUMNS_UNSORTED:
887     a->sorted         = PETSC_FALSE;
888     break;
889   case MAT_KEEP_ZEROED_ROWS:
890     a->keepzeroedrows = PETSC_TRUE;
891     break;
892   case MAT_NO_NEW_NONZERO_LOCATIONS:
893     a->nonew          = 1;
894     break;
895   case MAT_NEW_NONZERO_LOCATION_ERR:
896     a->nonew          = -1;
897     break;
898   case MAT_NEW_NONZERO_ALLOCATION_ERR:
899     a->nonew          = -2;
900     break;
901   case MAT_YES_NEW_NONZERO_LOCATIONS:
902     a->nonew          = 0;
903     break;
904   case MAT_ROWS_SORTED:
905   case MAT_ROWS_UNSORTED:
906   case MAT_YES_NEW_DIAGONALS:
907   case MAT_IGNORE_OFF_PROC_ENTRIES:
908   case MAT_USE_HASH_TABLE:
909     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
910     break;
911   case MAT_NO_NEW_DIAGONALS:
912     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913   case MAT_SYMMETRIC:
914   case MAT_STRUCTURALLY_SYMMETRIC:
915   case MAT_NOT_SYMMETRIC:
916   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917   case MAT_HERMITIAN:
918   case MAT_NOT_HERMITIAN:
919   case MAT_SYMMETRY_ETERNAL:
920   case MAT_NOT_SYMMETRY_ETERNAL:
921     break;
922   default:
923     SETERRQ(PETSC_ERR_SUP,"unknown option");
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetRow_SeqBAIJ"
930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
931 {
932   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
935   MatScalar      *aa,*aa_i;
936   PetscScalar    *v_i;
937 
938   PetscFunctionBegin;
939   bs  = A->bs;
940   ai  = a->i;
941   aj  = a->j;
942   aa  = a->a;
943   bs2 = a->bs2;
944 
945   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
946 
947   bn  = row/bs;   /* Block number */
948   bp  = row % bs; /* Block Position */
949   M   = ai[bn+1] - ai[bn];
950   *nz = bs*M;
951 
952   if (v) {
953     *v = 0;
954     if (*nz) {
955       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
956       for (i=0; i<M; i++) { /* for each block in the block row */
957         v_i  = *v + i*bs;
958         aa_i = aa + bs2*(ai[bn] + i);
959         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
960       }
961     }
962   }
963 
964   if (idx) {
965     *idx = 0;
966     if (*nz) {
967       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
968       for (i=0; i<M; i++) { /* for each block in the block row */
969         idx_i = *idx + i*bs;
970         itmp  = bs*aj[ai[bn] + i];
971         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
972       }
973     }
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
986   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatTranspose_SeqBAIJ"
992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
993 {
994   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
995   Mat            C;
996   PetscErrorCode ierr;
997   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
998   PetscInt       *rows,*cols,bs2=a->bs2;
999   PetscScalar    *array;
1000 
1001   PetscFunctionBegin;
1002   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1003   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1004   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1005 
1006 #if defined(PETSC_USE_MAT_SINGLE)
1007   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1008   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1009 #else
1010   array = a->a;
1011 #endif
1012 
1013   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1014   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1015   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1016   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1017   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1018   ierr = PetscFree(col);CHKERRQ(ierr);
1019   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1020   cols = rows + bs;
1021   for (i=0; i<mbs; i++) {
1022     cols[0] = i*bs;
1023     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1024     len = ai[i+1] - ai[i];
1025     for (j=0; j<len; j++) {
1026       rows[0] = (*aj++)*bs;
1027       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1028       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1029       array += bs2;
1030     }
1031   }
1032   ierr = PetscFree(rows);CHKERRQ(ierr);
1033 #if defined(PETSC_USE_MAT_SINGLE)
1034   ierr = PetscFree(array);
1035 #endif
1036 
1037   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039 
1040   if (B) {
1041     *B = C;
1042   } else {
1043     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1051 {
1052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1055   int            fd;
1056   PetscScalar    *aa;
1057   FILE           *file;
1058 
1059   PetscFunctionBegin;
1060   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1061   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1062   col_lens[0] = MAT_FILE_COOKIE;
1063 
1064   col_lens[1] = A->m;
1065   col_lens[2] = A->n;
1066   col_lens[3] = a->nz*bs2;
1067 
1068   /* store lengths of each row and write (including header) to file */
1069   count = 0;
1070   for (i=0; i<a->mbs; i++) {
1071     for (j=0; j<bs; j++) {
1072       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1073     }
1074   }
1075   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1076   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1077 
1078   /* store column indices (zero start index) */
1079   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1080   count = 0;
1081   for (i=0; i<a->mbs; i++) {
1082     for (j=0; j<bs; j++) {
1083       for (k=a->i[i]; k<a->i[i+1]; k++) {
1084         for (l=0; l<bs; l++) {
1085           jj[count++] = bs*a->j[k] + l;
1086         }
1087       }
1088     }
1089   }
1090   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1091   ierr = PetscFree(jj);CHKERRQ(ierr);
1092 
1093   /* store nonzero values */
1094   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1095   count = 0;
1096   for (i=0; i<a->mbs; i++) {
1097     for (j=0; j<bs; j++) {
1098       for (k=a->i[i]; k<a->i[i+1]; k++) {
1099         for (l=0; l<bs; l++) {
1100           aa[count++] = a->a[bs2*k + l*bs + j];
1101         }
1102       }
1103     }
1104   }
1105   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1106   ierr = PetscFree(aa);CHKERRQ(ierr);
1107 
1108   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1109   if (file) {
1110     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1118 {
1119   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1120   PetscErrorCode    ierr;
1121   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1122   PetscViewerFormat format;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1126   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1127     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1128   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1129     Mat aij;
1130     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1131     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1132     ierr = MatDestroy(aij);CHKERRQ(ierr);
1133   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1134      PetscFunctionReturn(0);
1135   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1136     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1137     for (i=0; i<a->mbs; i++) {
1138       for (j=0; j<bs; j++) {
1139         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1140         for (k=a->i[i]; k<a->i[i+1]; k++) {
1141           for (l=0; l<bs; l++) {
1142 #if defined(PETSC_USE_COMPLEX)
1143             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1145                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1146             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             }
1152 #else
1153             if (a->a[bs2*k + l*bs + j] != 0.0) {
1154               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1155             }
1156 #endif
1157           }
1158         }
1159         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1160       }
1161     }
1162     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1163   } else {
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1165     for (i=0; i<a->mbs; i++) {
1166       for (j=0; j<bs; j++) {
1167         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1168         for (k=a->i[i]; k<a->i[i+1]; k++) {
1169           for (l=0; l<bs; l++) {
1170 #if defined(PETSC_USE_COMPLEX)
1171             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1173                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             }
1180 #else
1181             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1182 #endif
1183           }
1184         }
1185         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1186       }
1187     }
1188     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1189   }
1190   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1197 {
1198   Mat            A = (Mat) Aa;
1199   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1202   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1203   MatScalar      *aa;
1204   PetscViewer    viewer;
1205 
1206   PetscFunctionBegin;
1207 
1208   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1209   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1210 
1211   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1212 
1213   /* loop over matrix elements drawing boxes */
1214   color = PETSC_DRAW_BLUE;
1215   for (i=0,row=0; i<mbs; i++,row+=bs) {
1216     for (j=a->i[i]; j<a->i[i+1]; j++) {
1217       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1218       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219       aa = a->a + j*bs2;
1220       for (k=0; k<bs; k++) {
1221         for (l=0; l<bs; l++) {
1222           if (PetscRealPart(*aa++) >=  0.) continue;
1223           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1224         }
1225       }
1226     }
1227   }
1228   color = PETSC_DRAW_CYAN;
1229   for (i=0,row=0; i<mbs; i++,row+=bs) {
1230     for (j=a->i[i]; j<a->i[i+1]; j++) {
1231       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1232       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1233       aa = a->a + j*bs2;
1234       for (k=0; k<bs; k++) {
1235         for (l=0; l<bs; l++) {
1236           if (PetscRealPart(*aa++) != 0.) continue;
1237           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1238         }
1239       }
1240     }
1241   }
1242 
1243   color = PETSC_DRAW_RED;
1244   for (i=0,row=0; i<mbs; i++,row+=bs) {
1245     for (j=a->i[i]; j<a->i[i+1]; j++) {
1246       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1247       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1248       aa = a->a + j*bs2;
1249       for (k=0; k<bs; k++) {
1250         for (l=0; l<bs; l++) {
1251           if (PetscRealPart(*aa++) <= 0.) continue;
1252           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1253         }
1254       }
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1263 {
1264   PetscErrorCode ierr;
1265   PetscReal      xl,yl,xr,yr,w,h;
1266   PetscDraw      draw;
1267   PetscTruth     isnull;
1268 
1269   PetscFunctionBegin;
1270 
1271   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1272   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1273 
1274   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1275   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1276   xr += w;    yr += h;  xl = -w;     yl = -h;
1277   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1278   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1279   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ"
1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscTruth     iascii,isbinary,isdraw;
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1294   if (iascii){
1295     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1296   } else if (isbinary) {
1297     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1298   } else if (isdraw) {
1299     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1300   } else {
1301     Mat B;
1302     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1303     ierr = MatView(B,viewer);CHKERRQ(ierr);
1304     ierr = MatDestroy(B);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1313 {
1314   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1316   PetscInt    *ai = a->i,*ailen = a->ilen;
1317   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1318   MatScalar   *ap,*aa = a->a,zero = 0.0;
1319 
1320   PetscFunctionBegin;
1321   for (k=0; k<m; k++) { /* loop over rows */
1322     row  = im[k]; brow = row/bs;
1323     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1324     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1325     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1326     nrow = ailen[brow];
1327     for (l=0; l<n; l++) { /* loop over columns */
1328       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1329       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1330       col  = in[l] ;
1331       bcol = col/bs;
1332       cidx = col%bs;
1333       ridx = row%bs;
1334       high = nrow;
1335       low  = 0; /* assume unsorted */
1336       while (high-low > 5) {
1337         t = (low+high)/2;
1338         if (rp[t] > bcol) high = t;
1339         else             low  = t;
1340       }
1341       for (i=low; i<high; i++) {
1342         if (rp[i] > bcol) break;
1343         if (rp[i] == bcol) {
1344           *v++ = ap[bs2*i+bs*cidx+ridx];
1345           goto finished;
1346         }
1347       }
1348       *v++ = zero;
1349       finished:;
1350     }
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #if defined(PETSC_USE_MAT_SINGLE)
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1359 {
1360   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1361   PetscErrorCode ierr;
1362   PetscInt       i,N = m*n*b->bs2;
1363   MatScalar      *vsingle;
1364 
1365   PetscFunctionBegin;
1366   if (N > b->setvalueslen) {
1367     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1368     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1369     b->setvalueslen  = N;
1370   }
1371   vsingle = b->setvaluescopy;
1372   for (i=0; i<N; i++) {
1373     vsingle[i] = v[i];
1374   }
1375   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 #endif
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1384 {
1385   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1386   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1387   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1388   PetscErrorCode  ierr;
1389   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1390   PetscTruth      roworiented=a->roworiented;
1391   const MatScalar *value = v;
1392   MatScalar       *ap,*aa = a->a,*bap;
1393 
1394   PetscFunctionBegin;
1395   if (roworiented) {
1396     stepval = (n-1)*bs;
1397   } else {
1398     stepval = (m-1)*bs;
1399   }
1400   for (k=0; k<m; k++) { /* loop over added rows */
1401     row  = im[k];
1402     if (row < 0) continue;
1403 #if defined(PETSC_USE_DEBUG)
1404     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1405 #endif
1406     rp   = aj + ai[row];
1407     ap   = aa + bs2*ai[row];
1408     rmax = imax[row];
1409     nrow = ailen[row];
1410     low  = 0;
1411     high = nrow;
1412     for (l=0; l<n; l++) { /* loop over added columns */
1413       if (in[l] < 0) continue;
1414 #if defined(PETSC_USE_DEBUG)
1415       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1416 #endif
1417       col = in[l];
1418       if (roworiented) {
1419         value = v + k*(stepval+bs)*bs + l*bs;
1420       } else {
1421         value = v + l*(stepval+bs)*bs + k*bs;
1422       }
1423       if (col <= lastcol) low = 0; else high = nrow;
1424       lastcol = col;
1425       while (high-low > 7) {
1426         t = (low+high)/2;
1427         if (rp[t] > col) high = t;
1428         else             low  = t;
1429       }
1430       for (i=low; i<high; i++) {
1431         if (rp[i] > col) break;
1432         if (rp[i] == col) {
1433           bap  = ap +  bs2*i;
1434           if (roworiented) {
1435             if (is == ADD_VALUES) {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] += *value++;
1439                 }
1440               }
1441             } else {
1442               for (ii=0; ii<bs; ii++,value+=stepval) {
1443                 for (jj=ii; jj<bs2; jj+=bs) {
1444                   bap[jj] = *value++;
1445                 }
1446               }
1447             }
1448           } else {
1449             if (is == ADD_VALUES) {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++ += *value++;
1453                 }
1454               }
1455             } else {
1456               for (ii=0; ii<bs; ii++,value+=stepval) {
1457                 for (jj=0; jj<bs; jj++) {
1458                   *bap++  = *value++;
1459                 }
1460               }
1461             }
1462           }
1463           goto noinsert2;
1464         }
1465       }
1466       if (nonew == 1) goto noinsert2;
1467       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1469       N = nrow++ - 1; high++;
1470       /* shift up all the later entries in this row */
1471       for (ii=N; ii>=i; ii--) {
1472         rp[ii+1] = rp[ii];
1473         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1474       }
1475       if (N >= i) {
1476         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477       }
1478       rp[i] = col;
1479       bap   = ap +  bs2*i;
1480       if (roworiented) {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=ii; jj<bs2; jj+=bs) {
1483             bap[jj] = *value++;
1484           }
1485         }
1486       } else {
1487         for (ii=0; ii<bs; ii++,value+=stepval) {
1488           for (jj=0; jj<bs; jj++) {
1489             *bap++  = *value++;
1490           }
1491         }
1492       }
1493       noinsert2:;
1494       low = i;
1495     }
1496     ailen[row] = nrow;
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1504 {
1505   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1506   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1507   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1508   PetscErrorCode ierr;
1509   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1510   MatScalar      *aa = a->a,*ap;
1511   PetscReal      ratio=0.6;
1512 
1513   PetscFunctionBegin;
1514   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1515 
1516   if (m) rmax = ailen[0];
1517   for (i=1; i<mbs; i++) {
1518     /* move each row back by the amount of empty slots (fshift) before it*/
1519     fshift += imax[i-1] - ailen[i-1];
1520     rmax   = PetscMax(rmax,ailen[i]);
1521     if (fshift) {
1522       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1523       N = ailen[i];
1524       for (j=0; j<N; j++) {
1525         ip[j-fshift] = ip[j];
1526         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1527       }
1528     }
1529     ai[i] = ai[i-1] + ailen[i-1];
1530   }
1531   if (mbs) {
1532     fshift += imax[mbs-1] - ailen[mbs-1];
1533     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1534   }
1535   /* reset ilen and imax for each row */
1536   for (i=0; i<mbs; i++) {
1537     ailen[i] = imax[i] = ai[i+1] - ai[i];
1538   }
1539   a->nz = ai[mbs];
1540 
1541   /* diagonals may have moved, so kill the diagonal pointers */
1542   a->idiagvalid = PETSC_FALSE;
1543   if (fshift && a->diag) {
1544     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1545     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1546     a->diag = 0;
1547   }
1548   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1549   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1550   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1551   a->reallocs          = 0;
1552   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1553 
1554   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1555   if (a->compressedrow.use){
1556     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1557   }
1558 
1559   A->same_nonzero = PETSC_TRUE;
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 /*
1564    This function returns an array of flags which indicate the locations of contiguous
1565    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1566    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1567    Assume: sizes should be long enough to hold all the values.
1568 */
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1572 {
1573   PetscInt   i,j,k,row;
1574   PetscTruth flg;
1575 
1576   PetscFunctionBegin;
1577   for (i=0,j=0; i<n; j++) {
1578     row = idx[i];
1579     if (row%bs!=0) { /* Not the begining of a block */
1580       sizes[j] = 1;
1581       i++;
1582     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1583       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1584       i++;
1585     } else { /* Begining of the block, so check if the complete block exists */
1586       flg = PETSC_TRUE;
1587       for (k=1; k<bs; k++) {
1588         if (row+k != idx[i+k]) { /* break in the block */
1589           flg = PETSC_FALSE;
1590           break;
1591         }
1592       }
1593       if (flg) { /* No break in the bs */
1594         sizes[j] = bs;
1595         i+= bs;
1596       } else {
1597         sizes[j] = 1;
1598         i++;
1599       }
1600     }
1601   }
1602   *bs_max = j;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1609 {
1610   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1611   PetscErrorCode ierr;
1612   PetscInt       i,j,k,count,*rows;
1613   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1614   PetscScalar    zero = 0.0;
1615   MatScalar      *aa;
1616 
1617   PetscFunctionBegin;
1618   /* Make a copy of the IS and  sort it */
1619   /* allocate memory for rows,sizes */
1620   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1621   sizes = rows + is_n;
1622 
1623   /* copy IS values to rows, and sort them */
1624   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1625   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1626   if (baij->keepzeroedrows) {
1627     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1628     bs_max = is_n;
1629     A->same_nonzero = PETSC_TRUE;
1630   } else {
1631     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1632     A->same_nonzero = PETSC_FALSE;
1633   }
1634 
1635   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1636     row   = rows[j];
1637     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1638     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1639     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1640     if (sizes[i] == bs && !baij->keepzeroedrows) {
1641       if (diag != 0.0) {
1642         if (baij->ilen[row/bs] > 0) {
1643           baij->ilen[row/bs]       = 1;
1644           baij->j[baij->i[row/bs]] = row/bs;
1645           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1646         }
1647         /* Now insert all the diagonal values for this bs */
1648         for (k=0; k<bs; k++) {
1649           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1650         }
1651       } else { /* (diag == 0.0) */
1652         baij->ilen[row/bs] = 0;
1653       } /* end (diag == 0.0) */
1654     } else { /* (sizes[i] != bs) */
1655 #if defined (PETSC_USE_DEBUG)
1656       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1657 #endif
1658       for (k=0; k<count; k++) {
1659         aa[0] =  zero;
1660         aa    += bs;
1661       }
1662       if (diag != 0.0) {
1663         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664       }
1665     }
1666   }
1667 
1668   ierr = PetscFree(rows);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1676 {
1677   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1678   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1679   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1680   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1681   PetscErrorCode ierr;
1682   PetscInt       ridx,cidx,bs2=a->bs2;
1683   PetscTruth     roworiented=a->roworiented;
1684   MatScalar      *ap,value,*aa=a->a,*bap;
1685 
1686   PetscFunctionBegin;
1687   for (k=0; k<m; k++) { /* loop over added rows */
1688     row  = im[k];
1689     brow = row/bs;
1690     if (row < 0) continue;
1691 #if defined(PETSC_USE_DEBUG)
1692     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1693 #endif
1694     rp   = aj + ai[brow];
1695     ap   = aa + bs2*ai[brow];
1696     rmax = imax[brow];
1697     nrow = ailen[brow];
1698     low  = 0;
1699     high = nrow;
1700     for (l=0; l<n; l++) { /* loop over added columns */
1701       if (in[l] < 0) continue;
1702 #if defined(PETSC_USE_DEBUG)
1703       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1704 #endif
1705       col = in[l]; bcol = col/bs;
1706       ridx = row % bs; cidx = col % bs;
1707       if (roworiented) {
1708         value = v[l + k*n];
1709       } else {
1710         value = v[k + l*m];
1711       }
1712       if (col <= lastcol) low = 0; else high = nrow;
1713       lastcol = col;
1714       while (high-low > 7) {
1715         t = (low+high)/2;
1716         if (rp[t] > bcol) high = t;
1717         else              low  = t;
1718       }
1719       for (i=low; i<high; i++) {
1720         if (rp[i] > bcol) break;
1721         if (rp[i] == bcol) {
1722           bap  = ap +  bs2*i + bs*cidx + ridx;
1723           if (is == ADD_VALUES) *bap += value;
1724           else                  *bap  = value;
1725           goto noinsert1;
1726         }
1727       }
1728       if (nonew == 1) goto noinsert1;
1729       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1731       N = nrow++ - 1; high++;
1732       /* shift up all the later entries in this row */
1733       for (ii=N; ii>=i; ii--) {
1734         rp[ii+1] = rp[ii];
1735         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1736       }
1737       if (N>=i) {
1738         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1739       }
1740       rp[i]                      = bcol;
1741       ap[bs2*i + bs*cidx + ridx] = value;
1742       a->nz++;
1743       noinsert1:;
1744       low = i;
1745     }
1746     ailen[brow] = nrow;
1747   }
1748   A->same_nonzero = PETSC_FALSE;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1756 {
1757   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1758   Mat            outA;
1759   PetscErrorCode ierr;
1760   PetscTruth     row_identity,col_identity;
1761 
1762   PetscFunctionBegin;
1763   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1764   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1765   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1766   if (!row_identity || !col_identity) {
1767     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1768   }
1769 
1770   outA          = inA;
1771   inA->factor   = FACTOR_LU;
1772 
1773   if (!a->diag) {
1774     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1775   }
1776 
1777   a->row        = row;
1778   a->col        = col;
1779   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1780   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1781 
1782   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1783   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1784   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1785 
1786   /*
1787       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1788       for ILU(0) factorization with natural ordering
1789   */
1790   if (inA->bs < 8) {
1791     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1792   } else {
1793     if (!a->solve_work) {
1794       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1795       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1796     }
1797   }
1798 
1799   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1800 
1801   PetscFunctionReturn(0);
1802 }
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1805 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1806 {
1807   static PetscTruth called = PETSC_FALSE;
1808   MPI_Comm          comm = A->comm;
1809   PetscErrorCode    ierr;
1810 
1811   PetscFunctionBegin;
1812   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1813   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1814   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 EXTERN_C_BEGIN
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1821 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1822 {
1823   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1824   PetscInt    i,nz,nbs;
1825 
1826   PetscFunctionBegin;
1827   nz  = baij->maxnz/baij->bs2;
1828   nbs = baij->nbs;
1829   for (i=0; i<nz; i++) {
1830     baij->j[i] = indices[i];
1831   }
1832   baij->nz = nz;
1833   for (i=0; i<nbs; i++) {
1834     baij->ilen[i] = baij->imax[i];
1835   }
1836 
1837   PetscFunctionReturn(0);
1838 }
1839 EXTERN_C_END
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1843 /*@
1844     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1845        in the matrix.
1846 
1847   Input Parameters:
1848 +  mat - the SeqBAIJ matrix
1849 -  indices - the column indices
1850 
1851   Level: advanced
1852 
1853   Notes:
1854     This can be called if you have precomputed the nonzero structure of the
1855   matrix and want to provide it to the matrix object to improve the performance
1856   of the MatSetValues() operation.
1857 
1858     You MUST have set the correct numbers of nonzeros per row in the call to
1859   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1860 
1861     MUST be called before any calls to MatSetValues();
1862 
1863 @*/
1864 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1865 {
1866   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1867 
1868   PetscFunctionBegin;
1869   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1870   PetscValidPointer(indices,2);
1871   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1872   if (f) {
1873     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1874   } else {
1875     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1882 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1883 {
1884   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1885   PetscErrorCode ierr;
1886   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1887   PetscReal      atmp;
1888   PetscScalar    *x,zero = 0.0;
1889   MatScalar      *aa;
1890   PetscInt       ncols,brow,krow,kcol;
1891 
1892   PetscFunctionBegin;
1893   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1894   bs   = A->bs;
1895   aa   = a->a;
1896   ai   = a->i;
1897   aj   = a->j;
1898   mbs = a->mbs;
1899 
1900   ierr = VecSet(v,zero);CHKERRQ(ierr);
1901   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1902   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1903   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1904   for (i=0; i<mbs; i++) {
1905     ncols = ai[1] - ai[0]; ai++;
1906     brow  = bs*i;
1907     for (j=0; j<ncols; j++){
1908       /* bcol = bs*(*aj); */
1909       for (kcol=0; kcol<bs; kcol++){
1910         for (krow=0; krow<bs; krow++){
1911           atmp = PetscAbsScalar(*aa); aa++;
1912           row = brow + krow;    /* row index */
1913           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1914           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1915         }
1916       }
1917       aj++;
1918     }
1919   }
1920   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatCopy_SeqBAIJ"
1926 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1927 {
1928   PetscErrorCode ierr;
1929 
1930   PetscFunctionBegin;
1931   /* If the two matrices have the same copy implementation, use fast copy. */
1932   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1933     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1934     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1935 
1936     if (a->i[A->m] != b->i[B->m]) {
1937       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1938     }
1939     ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr);
1940   } else {
1941     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1948 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1959 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1960 {
1961   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1962   PetscFunctionBegin;
1963   *array = a->a;
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1969 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1970 {
1971   PetscFunctionBegin;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #include "petscblaslapack.h"
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1978 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1979 {
1980   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1981   PetscErrorCode ierr;
1982   PetscInt       i,bs=Y->bs,j,bs2;
1983   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1984 
1985   PetscFunctionBegin;
1986   if (str == SAME_NONZERO_PATTERN) {
1987     PetscScalar alpha = a;
1988     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1989   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990     if (y->xtoy && y->XtoY != X) {
1991       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1992       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1993     }
1994     if (!y->xtoy) { /* get xtoy */
1995       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1996       y->XtoY = X;
1997     }
1998     bs2 = bs*bs;
1999     for (i=0; i<x->nz; i++) {
2000       j = 0;
2001       while (j < bs2){
2002         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2003         j++;
2004       }
2005     }
2006     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2007   } else {
2008     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2015 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2016 {
2017   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2018   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2019   PetscScalar    *aa = a->a;
2020 
2021   PetscFunctionBegin;
2022   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2028 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2029 {
2030   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2031   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2032   PetscScalar    *aa = a->a;
2033 
2034   PetscFunctionBegin;
2035   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 
2040 /* -------------------------------------------------------------------*/
2041 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2042        MatGetRow_SeqBAIJ,
2043        MatRestoreRow_SeqBAIJ,
2044        MatMult_SeqBAIJ_N,
2045 /* 4*/ MatMultAdd_SeqBAIJ_N,
2046        MatMultTranspose_SeqBAIJ,
2047        MatMultTransposeAdd_SeqBAIJ,
2048        MatSolve_SeqBAIJ_N,
2049        0,
2050        0,
2051 /*10*/ 0,
2052        MatLUFactor_SeqBAIJ,
2053        0,
2054        0,
2055        MatTranspose_SeqBAIJ,
2056 /*15*/ MatGetInfo_SeqBAIJ,
2057        MatEqual_SeqBAIJ,
2058        MatGetDiagonal_SeqBAIJ,
2059        MatDiagonalScale_SeqBAIJ,
2060        MatNorm_SeqBAIJ,
2061 /*20*/ 0,
2062        MatAssemblyEnd_SeqBAIJ,
2063        0,
2064        MatSetOption_SeqBAIJ,
2065        MatZeroEntries_SeqBAIJ,
2066 /*25*/ MatZeroRows_SeqBAIJ,
2067        MatLUFactorSymbolic_SeqBAIJ,
2068        MatLUFactorNumeric_SeqBAIJ_N,
2069        MatCholeskyFactorSymbolic_SeqBAIJ,
2070        MatCholeskyFactorNumeric_SeqBAIJ_N,
2071 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2072        MatILUFactorSymbolic_SeqBAIJ,
2073        MatICCFactorSymbolic_SeqBAIJ,
2074        MatGetArray_SeqBAIJ,
2075        MatRestoreArray_SeqBAIJ,
2076 /*35*/ MatDuplicate_SeqBAIJ,
2077        0,
2078        0,
2079        MatILUFactor_SeqBAIJ,
2080        0,
2081 /*40*/ MatAXPY_SeqBAIJ,
2082        MatGetSubMatrices_SeqBAIJ,
2083        MatIncreaseOverlap_SeqBAIJ,
2084        MatGetValues_SeqBAIJ,
2085        MatCopy_SeqBAIJ,
2086 /*45*/ MatPrintHelp_SeqBAIJ,
2087        MatScale_SeqBAIJ,
2088        0,
2089        0,
2090        0,
2091 /*50*/ 0,
2092        MatGetRowIJ_SeqBAIJ,
2093        MatRestoreRowIJ_SeqBAIJ,
2094        0,
2095        0,
2096 /*55*/ 0,
2097        0,
2098        0,
2099        0,
2100        MatSetValuesBlocked_SeqBAIJ,
2101 /*60*/ MatGetSubMatrix_SeqBAIJ,
2102        MatDestroy_SeqBAIJ,
2103        MatView_SeqBAIJ,
2104        MatGetPetscMaps_Petsc,
2105        0,
2106 /*65*/ 0,
2107        0,
2108        0,
2109        0,
2110        0,
2111 /*70*/ MatGetRowMax_SeqBAIJ,
2112        MatConvert_Basic,
2113        0,
2114        0,
2115        0,
2116 /*75*/ 0,
2117        0,
2118        0,
2119        0,
2120        0,
2121 /*80*/ 0,
2122        0,
2123        0,
2124        0,
2125        MatLoad_SeqBAIJ,
2126 /*85*/ 0,
2127        0,
2128        0,
2129        0,
2130        0,
2131 /*90*/ 0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*95*/ 0,
2137        0,
2138        0,
2139        0,
2140        0,
2141 /*100*/0,
2142        0,
2143        0,
2144        0,
2145        0,
2146 /*105*/0,
2147        MatRealPart_SeqBAIJ,
2148        MatImaginaryPart_SeqBAIJ
2149 };
2150 
2151 EXTERN_C_BEGIN
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2154 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2155 {
2156   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2157   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   if (aij->nonew != 1) {
2162     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2163   }
2164 
2165   /* allocate space for values if not already there */
2166   if (!aij->saved_values) {
2167     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2168   }
2169 
2170   /* copy values over */
2171   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 EXTERN_C_END
2175 
2176 EXTERN_C_BEGIN
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2179 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2180 {
2181   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2182   PetscErrorCode ierr;
2183   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2184 
2185   PetscFunctionBegin;
2186   if (aij->nonew != 1) {
2187     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2188   }
2189   if (!aij->saved_values) {
2190     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2191   }
2192 
2193   /* copy values over */
2194   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 EXTERN_C_END
2198 
2199 EXTERN_C_BEGIN
2200 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2202 EXTERN_C_END
2203 
2204 EXTERN_C_BEGIN
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2207 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2208 {
2209   Mat_SeqBAIJ    *b;
2210   PetscErrorCode ierr;
2211   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2212   PetscTruth     flg,skipallocation = PETSC_FALSE;
2213 
2214   PetscFunctionBegin;
2215 
2216   if (nz == MAT_SKIP_ALLOCATION) {
2217     skipallocation = PETSC_TRUE;
2218     nz             = 0;
2219   }
2220 
2221   B->preallocated = PETSC_TRUE;
2222   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2223   if (nnz && newbs != bs) {
2224     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2225   }
2226   bs   = newbs;
2227 
2228   mbs  = B->m/bs;
2229   nbs  = B->n/bs;
2230   bs2  = bs*bs;
2231 
2232   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2233     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2234   }
2235 
2236   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2237   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2238   if (nnz) {
2239     for (i=0; i<mbs; i++) {
2240       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2241       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2242     }
2243   }
2244 
2245   b       = (Mat_SeqBAIJ*)B->data;
2246   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2247   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2248   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2249   if (!flg) {
2250     switch (bs) {
2251     case 1:
2252       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2253       B->ops->mult            = MatMult_SeqBAIJ_1;
2254       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2255       break;
2256     case 2:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2258       B->ops->mult            = MatMult_SeqBAIJ_2;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2260       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2261       break;
2262     case 3:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2264       B->ops->mult            = MatMult_SeqBAIJ_3;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2266       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2267       break;
2268     case 4:
2269       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2270       B->ops->mult            = MatMult_SeqBAIJ_4;
2271       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2272       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2273       break;
2274     case 5:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2276       B->ops->mult            = MatMult_SeqBAIJ_5;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2278       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2279       break;
2280     case 6:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2282       B->ops->mult            = MatMult_SeqBAIJ_6;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2284       break;
2285     case 7:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2287       B->ops->mult            = MatMult_SeqBAIJ_7;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2289       break;
2290     default:
2291       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2292       B->ops->mult            = MatMult_SeqBAIJ_N;
2293       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2294       break;
2295     }
2296   }
2297   B->bs      = bs;
2298   b->mbs     = mbs;
2299   b->nbs     = nbs;
2300   if (!skipallocation) {
2301     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2302     /* b->ilen will count nonzeros in each block row so far. */
2303     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2304     if (!nnz) {
2305       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2306       else if (nz <= 0)        nz = 1;
2307       for (i=0; i<mbs; i++) b->imax[i] = nz;
2308       nz = nz*mbs;
2309     } else {
2310       nz = 0;
2311       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2312     }
2313 
2314     /* allocate the matrix space */
2315     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2316     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2317     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2318     b->singlemalloc = PETSC_TRUE;
2319 
2320     b->i[0] = 0;
2321     for (i=1; i<mbs+1; i++) {
2322       b->i[i] = b->i[i-1] + b->imax[i-1];
2323     }
2324   }
2325 
2326   B->bs               = bs;
2327   b->bs2              = bs2;
2328   b->mbs              = mbs;
2329   b->nz               = 0;
2330   b->maxnz            = nz*bs2;
2331   B->info.nz_unneeded = (PetscReal)b->maxnz;
2332   PetscFunctionReturn(0);
2333 }
2334 EXTERN_C_END
2335 
2336 /*MC
2337    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2338    block sparse compressed row format.
2339 
2340    Options Database Keys:
2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2342 
2343   Level: beginner
2344 
2345 .Seealso: MatCreateSeqBAIJ
2346 M*/
2347 
2348 EXTERN_C_BEGIN
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatCreate_SeqBAIJ"
2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2352 {
2353   PetscErrorCode ierr;
2354   PetscMPIInt    size;
2355   Mat_SeqBAIJ    *b;
2356 
2357   PetscFunctionBegin;
2358   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2359   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2360 
2361   B->m = B->M = PetscMax(B->m,B->M);
2362   B->n = B->N = PetscMax(B->n,B->N);
2363   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2364   B->data = (void*)b;
2365   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2366   B->factor           = 0;
2367   B->mapping          = 0;
2368   b->row              = 0;
2369   b->col              = 0;
2370   b->icol             = 0;
2371   b->reallocs         = 0;
2372   b->saved_values     = 0;
2373 #if defined(PETSC_USE_MAT_SINGLE)
2374   b->setvalueslen     = 0;
2375   b->setvaluescopy    = PETSC_NULL;
2376 #endif
2377 
2378   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2379   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2380 
2381   b->sorted           = PETSC_FALSE;
2382   b->roworiented      = PETSC_TRUE;
2383   b->nonew            = 0;
2384   b->diag             = 0;
2385   b->solve_work       = 0;
2386   b->mult_work        = 0;
2387   B->spptr            = 0;
2388   B->info.nz_unneeded = (PetscReal)b->maxnz;
2389   b->keepzeroedrows   = PETSC_FALSE;
2390   b->xtoy              = 0;
2391   b->XtoY              = 0;
2392   b->compressedrow.use     = PETSC_FALSE;
2393   b->compressedrow.nrows   = 0;
2394   b->compressedrow.i       = PETSC_NULL;
2395   b->compressedrow.rindex  = PETSC_NULL;
2396   b->compressedrow.checked = PETSC_FALSE;
2397   B->same_nonzero          = PETSC_FALSE;
2398 
2399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2400                                      "MatInvertBlockDiagonal_SeqBAIJ",
2401                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2403                                      "MatStoreValues_SeqBAIJ",
2404                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2406                                      "MatRetrieveValues_SeqBAIJ",
2407                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2409                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2410                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2412                                      "MatConvert_SeqBAIJ_SeqAIJ",
2413                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2415                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2416                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2418                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2419                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 EXTERN_C_END
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2426 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2427 {
2428   Mat            C;
2429   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2430   PetscErrorCode ierr;
2431   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2432 
2433   PetscFunctionBegin;
2434   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2435 
2436   *B = 0;
2437   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2438   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2439   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2440   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2441   c    = (Mat_SeqBAIJ*)C->data;
2442 
2443   C->M   = A->M;
2444   C->N   = A->N;
2445   C->bs  = A->bs;
2446   c->bs2 = a->bs2;
2447   c->mbs = a->mbs;
2448   c->nbs = a->nbs;
2449 
2450   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2451   for (i=0; i<mbs; i++) {
2452     c->imax[i] = a->imax[i];
2453     c->ilen[i] = a->ilen[i];
2454   }
2455 
2456   /* allocate the matrix space */
2457   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2458   c->singlemalloc = PETSC_TRUE;
2459   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2460   if (mbs > 0) {
2461     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2462     if (cpvalues == MAT_COPY_VALUES) {
2463       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2464     } else {
2465       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2466     }
2467   }
2468   c->sorted      = a->sorted;
2469   c->roworiented = a->roworiented;
2470   c->nonew       = a->nonew;
2471 
2472   if (a->diag) {
2473     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2474     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2475     for (i=0; i<mbs; i++) {
2476       c->diag[i] = a->diag[i];
2477     }
2478   } else c->diag        = 0;
2479   c->nz                 = a->nz;
2480   c->maxnz              = a->maxnz;
2481   c->solve_work         = 0;
2482   c->mult_work          = 0;
2483   C->preallocated       = PETSC_TRUE;
2484   C->assembled          = PETSC_TRUE;
2485 
2486   c->compressedrow.use     = a->compressedrow.use;
2487   c->compressedrow.nrows   = a->compressedrow.nrows;
2488   c->compressedrow.checked = a->compressedrow.checked;
2489   if ( a->compressedrow.checked && a->compressedrow.use){
2490     i = a->compressedrow.nrows;
2491     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2492     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2493     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2494     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2495   } else {
2496     c->compressedrow.use    = PETSC_FALSE;
2497     c->compressedrow.i      = PETSC_NULL;
2498     c->compressedrow.rindex = PETSC_NULL;
2499   }
2500   C->same_nonzero = A->same_nonzero;
2501   *B = C;
2502   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatLoad_SeqBAIJ"
2508 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2509 {
2510   Mat_SeqBAIJ    *a;
2511   Mat            B;
2512   PetscErrorCode ierr;
2513   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2514   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2515   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2516   PetscInt       *masked,nmask,tmp,bs2,ishift;
2517   PetscMPIInt    size;
2518   int            fd;
2519   PetscScalar    *aa;
2520   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2524   bs2  = bs*bs;
2525 
2526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2527   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2528   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2529   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2530   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2531   M = header[1]; N = header[2]; nz = header[3];
2532 
2533   if (header[3] < 0) {
2534     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2535   }
2536 
2537   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2538 
2539   /*
2540      This code adds extra rows to make sure the number of rows is
2541     divisible by the blocksize
2542   */
2543   mbs        = M/bs;
2544   extra_rows = bs - M + bs*(mbs);
2545   if (extra_rows == bs) extra_rows = 0;
2546   else                  mbs++;
2547   if (extra_rows) {
2548     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2549   }
2550 
2551   /* read in row lengths */
2552   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2553   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2554   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2555 
2556   /* read in column indices */
2557   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2558   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2559   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2560 
2561   /* loop over row lengths determining block row lengths */
2562   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2563   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2564   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2565   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2566   masked   = mask + mbs;
2567   rowcount = 0; nzcount = 0;
2568   for (i=0; i<mbs; i++) {
2569     nmask = 0;
2570     for (j=0; j<bs; j++) {
2571       kmax = rowlengths[rowcount];
2572       for (k=0; k<kmax; k++) {
2573         tmp = jj[nzcount++]/bs;
2574         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2575       }
2576       rowcount++;
2577     }
2578     browlengths[i] += nmask;
2579     /* zero out the mask elements we set */
2580     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2581   }
2582 
2583   /* create our matrix */
2584   ierr = MatCreate(comm,&B);
2585   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2586   ierr = MatSetType(B,type);CHKERRQ(ierr);
2587   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2588   a = (Mat_SeqBAIJ*)B->data;
2589 
2590   /* set matrix "i" values */
2591   a->i[0] = 0;
2592   for (i=1; i<= mbs; i++) {
2593     a->i[i]      = a->i[i-1] + browlengths[i-1];
2594     a->ilen[i-1] = browlengths[i-1];
2595   }
2596   a->nz         = 0;
2597   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2598 
2599   /* read in nonzero values */
2600   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2601   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2602   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2603 
2604   /* set "a" and "j" values into matrix */
2605   nzcount = 0; jcount = 0;
2606   for (i=0; i<mbs; i++) {
2607     nzcountb = nzcount;
2608     nmask    = 0;
2609     for (j=0; j<bs; j++) {
2610       kmax = rowlengths[i*bs+j];
2611       for (k=0; k<kmax; k++) {
2612         tmp = jj[nzcount++]/bs;
2613 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2614       }
2615     }
2616     /* sort the masked values */
2617     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2618 
2619     /* set "j" values into matrix */
2620     maskcount = 1;
2621     for (j=0; j<nmask; j++) {
2622       a->j[jcount++]  = masked[j];
2623       mask[masked[j]] = maskcount++;
2624     }
2625     /* set "a" values into matrix */
2626     ishift = bs2*a->i[i];
2627     for (j=0; j<bs; j++) {
2628       kmax = rowlengths[i*bs+j];
2629       for (k=0; k<kmax; k++) {
2630         tmp       = jj[nzcountb]/bs ;
2631         block     = mask[tmp] - 1;
2632         point     = jj[nzcountb] - bs*tmp;
2633         idx       = ishift + bs2*block + j + bs*point;
2634         a->a[idx] = (MatScalar)aa[nzcountb++];
2635       }
2636     }
2637     /* zero out the mask elements we set */
2638     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2639   }
2640   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2641 
2642   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2643   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2644   ierr = PetscFree(aa);CHKERRQ(ierr);
2645   ierr = PetscFree(jj);CHKERRQ(ierr);
2646   ierr = PetscFree(mask);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatView_Private(B);CHKERRQ(ierr);
2651 
2652   *A = B;
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatCreateSeqBAIJ"
2658 /*@C
2659    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2660    compressed row) format.  For good matrix assembly performance the
2661    user should preallocate the matrix storage by setting the parameter nz
2662    (or the array nnz).  By setting these parameters accurately, performance
2663    during matrix assembly can be increased by more than a factor of 50.
2664 
2665    Collective on MPI_Comm
2666 
2667    Input Parameters:
2668 +  comm - MPI communicator, set to PETSC_COMM_SELF
2669 .  bs - size of block
2670 .  m - number of rows
2671 .  n - number of columns
2672 .  nz - number of nonzero blocks  per block row (same for all rows)
2673 -  nnz - array containing the number of nonzero blocks in the various block rows
2674          (possibly different for each block row) or PETSC_NULL
2675 
2676    Output Parameter:
2677 .  A - the matrix
2678 
2679    Options Database Keys:
2680 .   -mat_no_unroll - uses code that does not unroll the loops in the
2681                      block calculations (much slower)
2682 .    -mat_block_size - size of the blocks to use
2683 
2684    Level: intermediate
2685 
2686    Notes:
2687    The number of rows and columns must be divisible by blocksize.
2688 
2689    If the nnz parameter is given then the nz parameter is ignored
2690 
2691    A nonzero block is any block that as 1 or more nonzeros in it
2692 
2693    The block AIJ format is fully compatible with standard Fortran 77
2694    storage.  That is, the stored row and column indices can begin at
2695    either one (as in Fortran) or zero.  See the users' manual for details.
2696 
2697    Specify the preallocated storage with either nz or nnz (not both).
2698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2699    allocation.  For additional details, see the users manual chapter on
2700    matrices.
2701 
2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2703 @*/
2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2705 {
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2718 /*@C
2719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2720    per row in the matrix. For good matrix assembly performance the
2721    user should preallocate the matrix storage by setting the parameter nz
2722    (or the array nnz).  By setting these parameters accurately, performance
2723    during matrix assembly can be increased by more than a factor of 50.
2724 
2725    Collective on MPI_Comm
2726 
2727    Input Parameters:
2728 +  A - the matrix
2729 .  bs - size of block
2730 .  nz - number of block nonzeros per block row (same for all rows)
2731 -  nnz - array containing the number of block nonzeros in the various block rows
2732          (possibly different for each block row) or PETSC_NULL
2733 
2734    Options Database Keys:
2735 .   -mat_no_unroll - uses code that does not unroll the loops in the
2736                      block calculations (much slower)
2737 .    -mat_block_size - size of the blocks to use
2738 
2739    Level: intermediate
2740 
2741    Notes:
2742    If the nnz parameter is given then the nz parameter is ignored
2743 
2744    The block AIJ format is fully compatible with standard Fortran 77
2745    storage.  That is, the stored row and column indices can begin at
2746    either one (as in Fortran) or zero.  See the users' manual for details.
2747 
2748    Specify the preallocated storage with either nz or nnz (not both).
2749    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2750    allocation.  For additional details, see the users manual chapter on
2751    matrices.
2752 
2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2754 @*/
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2756 {
2757   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2758 
2759   PetscFunctionBegin;
2760   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2761   if (f) {
2762     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2763   }
2764   PetscFunctionReturn(0);
2765 }
2766 
2767