xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4e78640e53e28f1a17004e3585001bcd87aef9bf)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1009   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1010   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012   ierr = PetscFree(col);CHKERRQ(ierr);
1013   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1014   cols = rows + bs;
1015   for (i=0; i<mbs; i++) {
1016     cols[0] = i*bs;
1017     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018     len = ai[i+1] - ai[i];
1019     for (j=0; j<len; j++) {
1020       rows[0] = (*aj++)*bs;
1021       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1023       array += bs2;
1024     }
1025   }
1026   ierr = PetscFree(rows);CHKERRQ(ierr);
1027 #if defined(PETSC_USE_MAT_SINGLE)
1028   ierr = PetscFree(array);
1029 #endif
1030 
1031   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033 
1034   if (B) {
1035     *B = C;
1036   } else {
1037     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045 {
1046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1047   PetscErrorCode ierr;
1048   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049   int            fd;
1050   PetscScalar    *aa;
1051   FILE           *file;
1052 
1053   PetscFunctionBegin;
1054   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056   col_lens[0] = MAT_FILE_COOKIE;
1057 
1058   col_lens[1] = A->m;
1059   col_lens[2] = A->n;
1060   col_lens[3] = a->nz*bs2;
1061 
1062   /* store lengths of each row and write (including header) to file */
1063   count = 0;
1064   for (i=0; i<a->mbs; i++) {
1065     for (j=0; j<bs; j++) {
1066       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067     }
1068   }
1069   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1071 
1072   /* store column indices (zero start index) */
1073   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078         for (l=0; l<bs; l++) {
1079           jj[count++] = bs*a->j[k] + l;
1080         }
1081       }
1082     }
1083   }
1084   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085   ierr = PetscFree(jj);CHKERRQ(ierr);
1086 
1087   /* store nonzero values */
1088   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089   count = 0;
1090   for (i=0; i<a->mbs; i++) {
1091     for (j=0; j<bs; j++) {
1092       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093         for (l=0; l<bs; l++) {
1094           aa[count++] = a->a[bs2*k + l*bs + j];
1095         }
1096       }
1097     }
1098   }
1099   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100   ierr = PetscFree(aa);CHKERRQ(ierr);
1101 
1102   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103   if (file) {
1104     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112 {
1113   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode    ierr;
1115   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116   PetscViewerFormat format;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123     Mat aij;
1124     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126     ierr = MatDestroy(aij);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128      PetscFunctionReturn(0);
1129   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1131     for (i=0; i<a->mbs; i++) {
1132       for (j=0; j<bs; j++) {
1133         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1134         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135           for (l=0; l<bs; l++) {
1136 #if defined(PETSC_USE_COMPLEX)
1137             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1143             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             }
1146 #else
1147             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1149             }
1150 #endif
1151           }
1152         }
1153         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1154       }
1155     }
1156     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159     for (i=0; i<a->mbs; i++) {
1160       for (j=0; j<bs; j++) {
1161         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163           for (l=0; l<bs; l++) {
1164 #if defined(PETSC_USE_COMPLEX)
1165             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1171             } else {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             }
1174 #else
1175             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1176 #endif
1177           }
1178         }
1179         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1180       }
1181     }
1182     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183   }
1184   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191 {
1192   Mat            A = (Mat) Aa;
1193   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1194   PetscErrorCode ierr;
1195   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197   MatScalar      *aa;
1198   PetscViewer    viewer;
1199 
1200   PetscFunctionBegin;
1201 
1202   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1204 
1205   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1206 
1207   /* loop over matrix elements drawing boxes */
1208   color = PETSC_DRAW_BLUE;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) >=  0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   color = PETSC_DRAW_CYAN;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) != 0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236 
1237   color = PETSC_DRAW_RED;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) <= 0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscReal      xl,yl,xr,yr,w,h;
1260   PetscDraw      draw;
1261   PetscTruth     isnull;
1262 
1263   PetscFunctionBegin;
1264 
1265   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1267 
1268   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270   xr += w;    yr += h;  xl = -w;     yl = -h;
1271   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ"
1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscTruth     iascii,isbinary,isdraw;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1288   if (iascii){
1289     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1290   } else if (isbinary) {
1291     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1292   } else if (isdraw) {
1293     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1294   } else {
1295     Mat B;
1296     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298     ierr = MatDestroy(B);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307 {
1308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310   PetscInt    *ai = a->i,*ailen = a->ilen;
1311   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313 
1314   PetscFunctionBegin;
1315   for (k=0; k<m; k++) { /* loop over rows */
1316     row  = im[k]; brow = row/bs;
1317     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320     nrow = ailen[brow];
1321     for (l=0; l<n; l++) { /* loop over columns */
1322       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324       col  = in[l] ;
1325       bcol = col/bs;
1326       cidx = col%bs;
1327       ridx = row%bs;
1328       high = nrow;
1329       low  = 0; /* assume unsorted */
1330       while (high-low > 5) {
1331         t = (low+high)/2;
1332         if (rp[t] > bcol) high = t;
1333         else             low  = t;
1334       }
1335       for (i=low; i<high; i++) {
1336         if (rp[i] > bcol) break;
1337         if (rp[i] == bcol) {
1338           *v++ = ap[bs2*i+bs*cidx+ridx];
1339           goto finished;
1340         }
1341       }
1342       *v++ = zero;
1343       finished:;
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_USE_MAT_SINGLE)
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353 {
1354   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355   PetscErrorCode ierr;
1356   PetscInt       i,N = m*n*b->bs2;
1357   MatScalar      *vsingle;
1358 
1359   PetscFunctionBegin;
1360   if (N > b->setvalueslen) {
1361     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363     b->setvalueslen  = N;
1364   }
1365   vsingle = b->setvaluescopy;
1366   for (i=0; i<N; i++) {
1367     vsingle[i] = v[i];
1368   }
1369   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 #endif
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378 {
1379   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382   PetscErrorCode  ierr;
1383   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384   PetscTruth      roworiented=a->roworiented;
1385   const MatScalar *value = v;
1386   MatScalar       *ap,*aa = a->a,*bap;
1387 
1388   PetscFunctionBegin;
1389   if (roworiented) {
1390     stepval = (n-1)*bs;
1391   } else {
1392     stepval = (m-1)*bs;
1393   }
1394   for (k=0; k<m; k++) { /* loop over added rows */
1395     row  = im[k];
1396     if (row < 0) continue;
1397 #if defined(PETSC_USE_DEBUG)
1398     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399 #endif
1400     rp   = aj + ai[row];
1401     ap   = aa + bs2*ai[row];
1402     rmax = imax[row];
1403     nrow = ailen[row];
1404     low  = 0;
1405     high = nrow;
1406     for (l=0; l<n; l++) { /* loop over added columns */
1407       if (in[l] < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410 #endif
1411       col = in[l];
1412       if (roworiented) {
1413         value = v + k*(stepval+bs)*bs + l*bs;
1414       } else {
1415         value = v + l*(stepval+bs)*bs + k*bs;
1416       }
1417       if (col <= lastcol) low = 0; else high = nrow;
1418       lastcol = col;
1419       while (high-low > 7) {
1420         t = (low+high)/2;
1421         if (rp[t] > col) high = t;
1422         else             low  = t;
1423       }
1424       for (i=low; i<high; i++) {
1425         if (rp[i] > col) break;
1426         if (rp[i] == col) {
1427           bap  = ap +  bs2*i;
1428           if (roworiented) {
1429             if (is == ADD_VALUES) {
1430               for (ii=0; ii<bs; ii++,value+=stepval) {
1431                 for (jj=ii; jj<bs2; jj+=bs) {
1432                   bap[jj] += *value++;
1433                 }
1434               }
1435             } else {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] = *value++;
1439                 }
1440               }
1441             }
1442           } else {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=0; jj<bs; jj++) {
1446                   *bap++ += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++  = *value++;
1453                 }
1454               }
1455             }
1456           }
1457           goto noinsert2;
1458         }
1459       }
1460       if (nonew == 1) goto noinsert2;
1461       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1463       N = nrow++ - 1;
1464       /* shift up all the later entries in this row */
1465       for (ii=N; ii>=i; ii--) {
1466         rp[ii+1] = rp[ii];
1467         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1468       }
1469       if (N >= i) {
1470         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       rp[i] = col;
1473       bap   = ap +  bs2*i;
1474       if (roworiented) {
1475         for (ii=0; ii<bs; ii++,value+=stepval) {
1476           for (jj=ii; jj<bs2; jj+=bs) {
1477             bap[jj] = *value++;
1478           }
1479         }
1480       } else {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=0; jj<bs; jj++) {
1483             *bap++  = *value++;
1484           }
1485         }
1486       }
1487       noinsert2:;
1488       low = i;
1489     }
1490     ailen[row] = nrow;
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498 {
1499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1502   PetscErrorCode ierr;
1503   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504   MatScalar      *aa = a->a,*ap;
1505   PetscReal      ratio=0.6;
1506 
1507   PetscFunctionBegin;
1508   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1509 
1510   if (m) rmax = ailen[0];
1511   for (i=1; i<mbs; i++) {
1512     /* move each row back by the amount of empty slots (fshift) before it*/
1513     fshift += imax[i-1] - ailen[i-1];
1514     rmax   = PetscMax(rmax,ailen[i]);
1515     if (fshift) {
1516       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1517       N = ailen[i];
1518       for (j=0; j<N; j++) {
1519         ip[j-fshift] = ip[j];
1520         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1521       }
1522     }
1523     ai[i] = ai[i-1] + ailen[i-1];
1524   }
1525   if (mbs) {
1526     fshift += imax[mbs-1] - ailen[mbs-1];
1527     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1528   }
1529   /* reset ilen and imax for each row */
1530   for (i=0; i<mbs; i++) {
1531     ailen[i] = imax[i] = ai[i+1] - ai[i];
1532   }
1533   a->nz = ai[mbs];
1534 
1535   /* diagonals may have moved, so kill the diagonal pointers */
1536   a->idiagvalid = PETSC_FALSE;
1537   if (fshift && a->diag) {
1538     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1539     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1540     a->diag = 0;
1541   }
1542   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1543   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1544   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1545   a->reallocs          = 0;
1546   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1547 
1548   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1549   if (a->compressedrow.use){
1550     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1551   }
1552 
1553   A->same_nonzero = PETSC_TRUE;
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /*
1558    This function returns an array of flags which indicate the locations of contiguous
1559    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1560    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1561    Assume: sizes should be long enough to hold all the values.
1562 */
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1565 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1566 {
1567   PetscInt   i,j,k,row;
1568   PetscTruth flg;
1569 
1570   PetscFunctionBegin;
1571   for (i=0,j=0; i<n; j++) {
1572     row = idx[i];
1573     if (row%bs!=0) { /* Not the begining of a block */
1574       sizes[j] = 1;
1575       i++;
1576     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1577       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1578       i++;
1579     } else { /* Begining of the block, so check if the complete block exists */
1580       flg = PETSC_TRUE;
1581       for (k=1; k<bs; k++) {
1582         if (row+k != idx[i+k]) { /* break in the block */
1583           flg = PETSC_FALSE;
1584           break;
1585         }
1586       }
1587       if (flg) { /* No break in the bs */
1588         sizes[j] = bs;
1589         i+= bs;
1590       } else {
1591         sizes[j] = 1;
1592         i++;
1593       }
1594     }
1595   }
1596   *bs_max = j;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1602 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1603 {
1604   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606   PetscInt       i,j,k,count,*rows;
1607   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1608   PetscScalar    zero = 0.0;
1609   MatScalar      *aa;
1610 
1611   PetscFunctionBegin;
1612   /* Make a copy of the IS and  sort it */
1613   /* allocate memory for rows,sizes */
1614   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1615   sizes = rows + is_n;
1616 
1617   /* copy IS values to rows, and sort them */
1618   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1619   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1620   if (baij->keepzeroedrows) {
1621     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1622     bs_max = is_n;
1623     A->same_nonzero = PETSC_TRUE;
1624   } else {
1625     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1626     A->same_nonzero = PETSC_FALSE;
1627   }
1628 
1629   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1630     row   = rows[j];
1631     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1632     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1633     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1634     if (sizes[i] == bs && !baij->keepzeroedrows) {
1635       if (diag != 0.0) {
1636         if (baij->ilen[row/bs] > 0) {
1637           baij->ilen[row/bs]       = 1;
1638           baij->j[baij->i[row/bs]] = row/bs;
1639           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1640         }
1641         /* Now insert all the diagonal values for this bs */
1642         for (k=0; k<bs; k++) {
1643           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1644         }
1645       } else { /* (diag == 0.0) */
1646         baij->ilen[row/bs] = 0;
1647       } /* end (diag == 0.0) */
1648     } else { /* (sizes[i] != bs) */
1649 #if defined (PETSC_USE_DEBUG)
1650       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1651 #endif
1652       for (k=0; k<count; k++) {
1653         aa[0] =  zero;
1654         aa    += bs;
1655       }
1656       if (diag != 0.0) {
1657         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658       }
1659     }
1660   }
1661 
1662   ierr = PetscFree(rows);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1669 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1670 {
1671   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1672   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1673   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1674   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1675   PetscErrorCode ierr;
1676   PetscInt       ridx,cidx,bs2=a->bs2;
1677   PetscTruth     roworiented=a->roworiented;
1678   MatScalar      *ap,value,*aa=a->a,*bap;
1679 
1680   PetscFunctionBegin;
1681   for (k=0; k<m; k++) { /* loop over added rows */
1682     row  = im[k];
1683     brow = row/bs;
1684     if (row < 0) continue;
1685 #if defined(PETSC_USE_DEBUG)
1686     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1687 #endif
1688     rp   = aj + ai[brow];
1689     ap   = aa + bs2*ai[brow];
1690     rmax = imax[brow];
1691     nrow = ailen[brow];
1692     low  = 0;
1693     high = nrow;
1694     for (l=0; l<n; l++) { /* loop over added columns */
1695       if (in[l] < 0) continue;
1696 #if defined(PETSC_USE_DEBUG)
1697       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1698 #endif
1699       col = in[l]; bcol = col/bs;
1700       ridx = row % bs; cidx = col % bs;
1701       if (roworiented) {
1702         value = v[l + k*n];
1703       } else {
1704         value = v[k + l*m];
1705       }
1706       if (col <= lastcol) low = 0; else high = nrow;
1707       lastcol = col;
1708       while (high-low > 7) {
1709         t = (low+high)/2;
1710         if (rp[t] > bcol) high = t;
1711         else              low  = t;
1712       }
1713       for (i=low; i<high; i++) {
1714         if (rp[i] > bcol) break;
1715         if (rp[i] == bcol) {
1716           bap  = ap +  bs2*i + bs*cidx + ridx;
1717           if (is == ADD_VALUES) *bap += value;
1718           else                  *bap  = value;
1719           goto noinsert1;
1720         }
1721       }
1722       if (nonew == 1) goto noinsert1;
1723       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1724       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1725       N = nrow++ - 1;
1726       /* shift up all the later entries in this row */
1727       for (ii=N; ii>=i; ii--) {
1728         rp[ii+1] = rp[ii];
1729         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1730       }
1731       if (N>=i) {
1732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1733       }
1734       rp[i]                      = bcol;
1735       ap[bs2*i + bs*cidx + ridx] = value;
1736       a->nz++;
1737       noinsert1:;
1738       low = i;
1739     }
1740     ailen[brow] = nrow;
1741   }
1742   A->same_nonzero = PETSC_FALSE;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1749 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1750 {
1751   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1752   Mat            outA;
1753   PetscErrorCode ierr;
1754   PetscTruth     row_identity,col_identity;
1755 
1756   PetscFunctionBegin;
1757   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1758   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1759   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1760   if (!row_identity || !col_identity) {
1761     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1762   }
1763 
1764   outA          = inA;
1765   inA->factor   = FACTOR_LU;
1766 
1767   if (!a->diag) {
1768     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1769   }
1770 
1771   a->row        = row;
1772   a->col        = col;
1773   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1774   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1775 
1776   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1777   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1778   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1779 
1780   /*
1781       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1782       for ILU(0) factorization with natural ordering
1783   */
1784   if (inA->bs < 8) {
1785     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1786   } else {
1787     if (!a->solve_work) {
1788       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1789       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1790     }
1791   }
1792 
1793   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1794 
1795   PetscFunctionReturn(0);
1796 }
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1799 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1800 {
1801   static PetscTruth called = PETSC_FALSE;
1802   MPI_Comm          comm = A->comm;
1803   PetscErrorCode    ierr;
1804 
1805   PetscFunctionBegin;
1806   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1807   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1808   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1816 {
1817   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1818   PetscInt    i,nz,nbs;
1819 
1820   PetscFunctionBegin;
1821   nz  = baij->maxnz/baij->bs2;
1822   nbs = baij->nbs;
1823   for (i=0; i<nz; i++) {
1824     baij->j[i] = indices[i];
1825   }
1826   baij->nz = nz;
1827   for (i=0; i<nbs; i++) {
1828     baij->ilen[i] = baij->imax[i];
1829   }
1830 
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1837 /*@
1838     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1839        in the matrix.
1840 
1841   Input Parameters:
1842 +  mat - the SeqBAIJ matrix
1843 -  indices - the column indices
1844 
1845   Level: advanced
1846 
1847   Notes:
1848     This can be called if you have precomputed the nonzero structure of the
1849   matrix and want to provide it to the matrix object to improve the performance
1850   of the MatSetValues() operation.
1851 
1852     You MUST have set the correct numbers of nonzeros per row in the call to
1853   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1854 
1855     MUST be called before any calls to MatSetValues();
1856 
1857 @*/
1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1859 {
1860   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1861 
1862   PetscFunctionBegin;
1863   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1864   PetscValidPointer(indices,2);
1865   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1866   if (f) {
1867     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1868   } else {
1869     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1876 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1877 {
1878   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1881   PetscReal      atmp;
1882   PetscScalar    *x,zero = 0.0;
1883   MatScalar      *aa;
1884   PetscInt       ncols,brow,krow,kcol;
1885 
1886   PetscFunctionBegin;
1887   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1888   bs   = A->bs;
1889   aa   = a->a;
1890   ai   = a->i;
1891   aj   = a->j;
1892   mbs = a->mbs;
1893 
1894   ierr = VecSet(v,zero);CHKERRQ(ierr);
1895   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1896   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1897   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1898   for (i=0; i<mbs; i++) {
1899     ncols = ai[1] - ai[0]; ai++;
1900     brow  = bs*i;
1901     for (j=0; j<ncols; j++){
1902       /* bcol = bs*(*aj); */
1903       for (kcol=0; kcol<bs; kcol++){
1904         for (krow=0; krow<bs; krow++){
1905           atmp = PetscAbsScalar(*aa); aa++;
1906           row = brow + krow;    /* row index */
1907           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1908           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1909         }
1910       }
1911       aj++;
1912     }
1913   }
1914   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1920 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1931 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1932 {
1933   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1934   PetscFunctionBegin;
1935   *array = a->a;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1941 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1942 {
1943   PetscFunctionBegin;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #include "petscblaslapack.h"
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1950 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1951 {
1952   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,bs=Y->bs,j,bs2;
1955   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1956 
1957   PetscFunctionBegin;
1958   if (str == SAME_NONZERO_PATTERN) {
1959     PetscScalar alpha = a;
1960     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1961   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1962     if (y->xtoy && y->XtoY != X) {
1963       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1964       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1965     }
1966     if (!y->xtoy) { /* get xtoy */
1967       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1968       y->XtoY = X;
1969     }
1970     bs2 = bs*bs;
1971     for (i=0; i<x->nz; i++) {
1972       j = 0;
1973       while (j < bs2){
1974         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1975         j++;
1976       }
1977     }
1978     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
1979   } else {
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatRealPart_SeqBAIJ"
1987 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
1988 {
1989   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1990   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1991   PetscScalar    *aa = a->a;
1992 
1993   PetscFunctionBegin;
1994   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2000 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2001 {
2002   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2003   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2004   PetscScalar    *aa = a->a;
2005 
2006   PetscFunctionBegin;
2007   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 
2012 /* -------------------------------------------------------------------*/
2013 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2014        MatGetRow_SeqBAIJ,
2015        MatRestoreRow_SeqBAIJ,
2016        MatMult_SeqBAIJ_N,
2017 /* 4*/ MatMultAdd_SeqBAIJ_N,
2018        MatMultTranspose_SeqBAIJ,
2019        MatMultTransposeAdd_SeqBAIJ,
2020        MatSolve_SeqBAIJ_N,
2021        0,
2022        0,
2023 /*10*/ 0,
2024        MatLUFactor_SeqBAIJ,
2025        0,
2026        0,
2027        MatTranspose_SeqBAIJ,
2028 /*15*/ MatGetInfo_SeqBAIJ,
2029        MatEqual_SeqBAIJ,
2030        MatGetDiagonal_SeqBAIJ,
2031        MatDiagonalScale_SeqBAIJ,
2032        MatNorm_SeqBAIJ,
2033 /*20*/ 0,
2034        MatAssemblyEnd_SeqBAIJ,
2035        0,
2036        MatSetOption_SeqBAIJ,
2037        MatZeroEntries_SeqBAIJ,
2038 /*25*/ MatZeroRows_SeqBAIJ,
2039        MatLUFactorSymbolic_SeqBAIJ,
2040        MatLUFactorNumeric_SeqBAIJ_N,
2041        MatCholeskyFactorSymbolic_SeqBAIJ,
2042        MatCholeskyFactorNumeric_SeqBAIJ_N,
2043 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2044        MatILUFactorSymbolic_SeqBAIJ,
2045        MatICCFactorSymbolic_SeqBAIJ,
2046        MatGetArray_SeqBAIJ,
2047        MatRestoreArray_SeqBAIJ,
2048 /*35*/ MatDuplicate_SeqBAIJ,
2049        0,
2050        0,
2051        MatILUFactor_SeqBAIJ,
2052        0,
2053 /*40*/ MatAXPY_SeqBAIJ,
2054        MatGetSubMatrices_SeqBAIJ,
2055        MatIncreaseOverlap_SeqBAIJ,
2056        MatGetValues_SeqBAIJ,
2057        0,
2058 /*45*/ MatPrintHelp_SeqBAIJ,
2059        MatScale_SeqBAIJ,
2060        0,
2061        0,
2062        0,
2063 /*50*/ 0,
2064        MatGetRowIJ_SeqBAIJ,
2065        MatRestoreRowIJ_SeqBAIJ,
2066        0,
2067        0,
2068 /*55*/ 0,
2069        0,
2070        0,
2071        0,
2072        MatSetValuesBlocked_SeqBAIJ,
2073 /*60*/ MatGetSubMatrix_SeqBAIJ,
2074        MatDestroy_SeqBAIJ,
2075        MatView_SeqBAIJ,
2076        MatGetPetscMaps_Petsc,
2077        0,
2078 /*65*/ 0,
2079        0,
2080        0,
2081        0,
2082        0,
2083 /*70*/ MatGetRowMax_SeqBAIJ,
2084        MatConvert_Basic,
2085        0,
2086        0,
2087        0,
2088 /*75*/ 0,
2089        0,
2090        0,
2091        0,
2092        0,
2093 /*80*/ 0,
2094        0,
2095        0,
2096        0,
2097        MatLoad_SeqBAIJ,
2098 /*85*/ 0,
2099        0,
2100        0,
2101        0,
2102        0,
2103 /*90*/ 0,
2104        0,
2105        0,
2106        0,
2107        0,
2108 /*95*/ 0,
2109        0,
2110        0,
2111        0,
2112        0,
2113 /*100*/0,
2114        0,
2115        0,
2116        0,
2117        0,
2118 /*105*/0,
2119        MatRealPart_SeqBAIJ,
2120        MatImaginaryPart_SeqBAIJ
2121 };
2122 
2123 EXTERN_C_BEGIN
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2126 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2127 {
2128   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2129   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   if (aij->nonew != 1) {
2134     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2135   }
2136 
2137   /* allocate space for values if not already there */
2138   if (!aij->saved_values) {
2139     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2140   }
2141 
2142   /* copy values over */
2143   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 EXTERN_C_END
2147 
2148 EXTERN_C_BEGIN
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2151 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2152 {
2153   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2154   PetscErrorCode ierr;
2155   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2156 
2157   PetscFunctionBegin;
2158   if (aij->nonew != 1) {
2159     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2160   }
2161   if (!aij->saved_values) {
2162     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2163   }
2164 
2165   /* copy values over */
2166   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 EXTERN_C_END
2170 
2171 EXTERN_C_BEGIN
2172 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2173 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2174 EXTERN_C_END
2175 
2176 EXTERN_C_BEGIN
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2179 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2180 {
2181   Mat_SeqBAIJ    *b;
2182   PetscErrorCode ierr;
2183   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2184   PetscTruth     flg,skipallocation = PETSC_FALSE;
2185 
2186   PetscFunctionBegin;
2187 
2188   if (nz == MAT_SKIP_ALLOCATION) {
2189     skipallocation = PETSC_TRUE;
2190     nz             = 0;
2191   }
2192 
2193   B->preallocated = PETSC_TRUE;
2194   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2195   if (nnz && newbs != bs) {
2196     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2197   }
2198   bs   = newbs;
2199 
2200   mbs  = B->m/bs;
2201   nbs  = B->n/bs;
2202   bs2  = bs*bs;
2203 
2204   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2205     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2206   }
2207 
2208   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2209   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2210   if (nnz) {
2211     for (i=0; i<mbs; i++) {
2212       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2213       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2214     }
2215   }
2216 
2217   b       = (Mat_SeqBAIJ*)B->data;
2218   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2219   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2220   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2221   if (!flg) {
2222     switch (bs) {
2223     case 1:
2224       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2225       B->ops->mult            = MatMult_SeqBAIJ_1;
2226       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2227       break;
2228     case 2:
2229       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2230       B->ops->mult            = MatMult_SeqBAIJ_2;
2231       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2232       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2233       break;
2234     case 3:
2235       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2236       B->ops->mult            = MatMult_SeqBAIJ_3;
2237       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2238       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2239       break;
2240     case 4:
2241       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2242       B->ops->mult            = MatMult_SeqBAIJ_4;
2243       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2244       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2245       break;
2246     case 5:
2247       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2248       B->ops->mult            = MatMult_SeqBAIJ_5;
2249       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2250       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2251       break;
2252     case 6:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2254       B->ops->mult            = MatMult_SeqBAIJ_6;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2256       break;
2257     case 7:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2259       B->ops->mult            = MatMult_SeqBAIJ_7;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2261       break;
2262     default:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2264       B->ops->mult            = MatMult_SeqBAIJ_N;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2266       break;
2267     }
2268   }
2269   B->bs      = bs;
2270   b->mbs     = mbs;
2271   b->nbs     = nbs;
2272   if (!skipallocation) {
2273     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2274     /* b->ilen will count nonzeros in each block row so far. */
2275     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2276     if (!nnz) {
2277       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2278       else if (nz <= 0)        nz = 1;
2279       for (i=0; i<mbs; i++) b->imax[i] = nz;
2280       nz = nz*mbs;
2281     } else {
2282       nz = 0;
2283       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2284     }
2285 
2286     /* allocate the matrix space */
2287     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2288     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2289     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2290     b->singlemalloc = PETSC_TRUE;
2291 
2292     b->i[0] = 0;
2293     for (i=1; i<mbs+1; i++) {
2294       b->i[i] = b->i[i-1] + b->imax[i-1];
2295     }
2296   }
2297 
2298   B->bs               = bs;
2299   b->bs2              = bs2;
2300   b->mbs              = mbs;
2301   b->nz               = 0;
2302   b->maxnz            = nz*bs2;
2303   B->info.nz_unneeded = (PetscReal)b->maxnz;
2304   PetscFunctionReturn(0);
2305 }
2306 EXTERN_C_END
2307 
2308 /*MC
2309    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2310    block sparse compressed row format.
2311 
2312    Options Database Keys:
2313 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2314 
2315   Level: beginner
2316 
2317 .Seealso: MatCreateSeqBAIJ
2318 M*/
2319 
2320 EXTERN_C_BEGIN
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatCreate_SeqBAIJ"
2323 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2324 {
2325   PetscErrorCode ierr;
2326   PetscMPIInt    size;
2327   Mat_SeqBAIJ    *b;
2328 
2329   PetscFunctionBegin;
2330   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2331   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2332 
2333   B->m = B->M = PetscMax(B->m,B->M);
2334   B->n = B->N = PetscMax(B->n,B->N);
2335   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2336   B->data = (void*)b;
2337   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2338   B->factor           = 0;
2339   B->mapping          = 0;
2340   b->row              = 0;
2341   b->col              = 0;
2342   b->icol             = 0;
2343   b->reallocs         = 0;
2344   b->saved_values     = 0;
2345 #if defined(PETSC_USE_MAT_SINGLE)
2346   b->setvalueslen     = 0;
2347   b->setvaluescopy    = PETSC_NULL;
2348 #endif
2349 
2350   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2351   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2352 
2353   b->sorted           = PETSC_FALSE;
2354   b->roworiented      = PETSC_TRUE;
2355   b->nonew            = 0;
2356   b->diag             = 0;
2357   b->solve_work       = 0;
2358   b->mult_work        = 0;
2359   B->spptr            = 0;
2360   B->info.nz_unneeded = (PetscReal)b->maxnz;
2361   b->keepzeroedrows   = PETSC_FALSE;
2362   b->xtoy              = 0;
2363   b->XtoY              = 0;
2364   b->compressedrow.use     = PETSC_FALSE;
2365   b->compressedrow.nrows   = 0;
2366   b->compressedrow.i       = PETSC_NULL;
2367   b->compressedrow.rindex  = PETSC_NULL;
2368   b->compressedrow.checked = PETSC_FALSE;
2369   B->same_nonzero          = PETSC_FALSE;
2370 
2371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2372                                      "MatInvertBlockDiagonal_SeqBAIJ",
2373                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2375                                      "MatStoreValues_SeqBAIJ",
2376                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2378                                      "MatRetrieveValues_SeqBAIJ",
2379                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2381                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2382                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2384                                      "MatConvert_SeqBAIJ_SeqAIJ",
2385                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2386   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2387                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2388                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2389   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2390                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2391                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 EXTERN_C_END
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2398 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2399 {
2400   Mat            C;
2401   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2402   PetscErrorCode ierr;
2403   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2404 
2405   PetscFunctionBegin;
2406   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2407 
2408   *B = 0;
2409   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2410   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2411   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2412   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2413   c    = (Mat_SeqBAIJ*)C->data;
2414 
2415   C->M   = A->M;
2416   C->N   = A->N;
2417   C->bs  = A->bs;
2418   c->bs2 = a->bs2;
2419   c->mbs = a->mbs;
2420   c->nbs = a->nbs;
2421 
2422   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2423   for (i=0; i<mbs; i++) {
2424     c->imax[i] = a->imax[i];
2425     c->ilen[i] = a->ilen[i];
2426   }
2427 
2428   /* allocate the matrix space */
2429   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2430   c->singlemalloc = PETSC_TRUE;
2431   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2432   if (mbs > 0) {
2433     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2434     if (cpvalues == MAT_COPY_VALUES) {
2435       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2436     } else {
2437       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2438     }
2439   }
2440   c->sorted      = a->sorted;
2441   c->roworiented = a->roworiented;
2442   c->nonew       = a->nonew;
2443 
2444   if (a->diag) {
2445     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2446     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2447     for (i=0; i<mbs; i++) {
2448       c->diag[i] = a->diag[i];
2449     }
2450   } else c->diag        = 0;
2451   c->nz                 = a->nz;
2452   c->maxnz              = a->maxnz;
2453   c->solve_work         = 0;
2454   c->mult_work          = 0;
2455   C->preallocated       = PETSC_TRUE;
2456   C->assembled          = PETSC_TRUE;
2457 
2458   c->compressedrow.use     = a->compressedrow.use;
2459   c->compressedrow.nrows   = a->compressedrow.nrows;
2460   c->compressedrow.checked = a->compressedrow.checked;
2461   if ( a->compressedrow.checked && a->compressedrow.use){
2462     i = a->compressedrow.nrows;
2463     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2464     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2465     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2466     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2467   } else {
2468     c->compressedrow.use    = PETSC_FALSE;
2469     c->compressedrow.i      = PETSC_NULL;
2470     c->compressedrow.rindex = PETSC_NULL;
2471   }
2472   C->same_nonzero = A->same_nonzero;
2473   *B = C;
2474   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatLoad_SeqBAIJ"
2480 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2481 {
2482   Mat_SeqBAIJ    *a;
2483   Mat            B;
2484   PetscErrorCode ierr;
2485   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2486   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2487   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2488   PetscInt       *masked,nmask,tmp,bs2,ishift;
2489   PetscMPIInt    size;
2490   int            fd;
2491   PetscScalar    *aa;
2492   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2493 
2494   PetscFunctionBegin;
2495   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2496   bs2  = bs*bs;
2497 
2498   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2499   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2500   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2501   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2502   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2503   M = header[1]; N = header[2]; nz = header[3];
2504 
2505   if (header[3] < 0) {
2506     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2507   }
2508 
2509   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2510 
2511   /*
2512      This code adds extra rows to make sure the number of rows is
2513     divisible by the blocksize
2514   */
2515   mbs        = M/bs;
2516   extra_rows = bs - M + bs*(mbs);
2517   if (extra_rows == bs) extra_rows = 0;
2518   else                  mbs++;
2519   if (extra_rows) {
2520     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2521   }
2522 
2523   /* read in row lengths */
2524   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2525   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2526   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2527 
2528   /* read in column indices */
2529   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2530   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2531   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2532 
2533   /* loop over row lengths determining block row lengths */
2534   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2535   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2536   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2537   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2538   masked   = mask + mbs;
2539   rowcount = 0; nzcount = 0;
2540   for (i=0; i<mbs; i++) {
2541     nmask = 0;
2542     for (j=0; j<bs; j++) {
2543       kmax = rowlengths[rowcount];
2544       for (k=0; k<kmax; k++) {
2545         tmp = jj[nzcount++]/bs;
2546         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2547       }
2548       rowcount++;
2549     }
2550     browlengths[i] += nmask;
2551     /* zero out the mask elements we set */
2552     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2553   }
2554 
2555   /* create our matrix */
2556   ierr = MatCreate(comm,&B);
2557   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2558   ierr = MatSetType(B,type);CHKERRQ(ierr);
2559   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2560   a = (Mat_SeqBAIJ*)B->data;
2561 
2562   /* set matrix "i" values */
2563   a->i[0] = 0;
2564   for (i=1; i<= mbs; i++) {
2565     a->i[i]      = a->i[i-1] + browlengths[i-1];
2566     a->ilen[i-1] = browlengths[i-1];
2567   }
2568   a->nz         = 0;
2569   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2570 
2571   /* read in nonzero values */
2572   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2573   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2574   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2575 
2576   /* set "a" and "j" values into matrix */
2577   nzcount = 0; jcount = 0;
2578   for (i=0; i<mbs; i++) {
2579     nzcountb = nzcount;
2580     nmask    = 0;
2581     for (j=0; j<bs; j++) {
2582       kmax = rowlengths[i*bs+j];
2583       for (k=0; k<kmax; k++) {
2584         tmp = jj[nzcount++]/bs;
2585 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2586       }
2587     }
2588     /* sort the masked values */
2589     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2590 
2591     /* set "j" values into matrix */
2592     maskcount = 1;
2593     for (j=0; j<nmask; j++) {
2594       a->j[jcount++]  = masked[j];
2595       mask[masked[j]] = maskcount++;
2596     }
2597     /* set "a" values into matrix */
2598     ishift = bs2*a->i[i];
2599     for (j=0; j<bs; j++) {
2600       kmax = rowlengths[i*bs+j];
2601       for (k=0; k<kmax; k++) {
2602         tmp       = jj[nzcountb]/bs ;
2603         block     = mask[tmp] - 1;
2604         point     = jj[nzcountb] - bs*tmp;
2605         idx       = ishift + bs2*block + j + bs*point;
2606         a->a[idx] = (MatScalar)aa[nzcountb++];
2607       }
2608     }
2609     /* zero out the mask elements we set */
2610     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2611   }
2612   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2613 
2614   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2615   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2616   ierr = PetscFree(aa);CHKERRQ(ierr);
2617   ierr = PetscFree(jj);CHKERRQ(ierr);
2618   ierr = PetscFree(mask);CHKERRQ(ierr);
2619 
2620   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2621   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2622   ierr = MatView_Private(B);CHKERRQ(ierr);
2623 
2624   *A = B;
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "MatCreateSeqBAIJ"
2630 /*@C
2631    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2632    compressed row) format.  For good matrix assembly performance the
2633    user should preallocate the matrix storage by setting the parameter nz
2634    (or the array nnz).  By setting these parameters accurately, performance
2635    during matrix assembly can be increased by more than a factor of 50.
2636 
2637    Collective on MPI_Comm
2638 
2639    Input Parameters:
2640 +  comm - MPI communicator, set to PETSC_COMM_SELF
2641 .  bs - size of block
2642 .  m - number of rows
2643 .  n - number of columns
2644 .  nz - number of nonzero blocks  per block row (same for all rows)
2645 -  nnz - array containing the number of nonzero blocks in the various block rows
2646          (possibly different for each block row) or PETSC_NULL
2647 
2648    Output Parameter:
2649 .  A - the matrix
2650 
2651    Options Database Keys:
2652 .   -mat_no_unroll - uses code that does not unroll the loops in the
2653                      block calculations (much slower)
2654 .    -mat_block_size - size of the blocks to use
2655 
2656    Level: intermediate
2657 
2658    Notes:
2659    The number of rows and columns must be divisible by blocksize.
2660 
2661    If the nnz parameter is given then the nz parameter is ignored
2662 
2663    A nonzero block is any block that as 1 or more nonzeros in it
2664 
2665    The block AIJ format is fully compatible with standard Fortran 77
2666    storage.  That is, the stored row and column indices can begin at
2667    either one (as in Fortran) or zero.  See the users' manual for details.
2668 
2669    Specify the preallocated storage with either nz or nnz (not both).
2670    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2671    allocation.  For additional details, see the users manual chapter on
2672    matrices.
2673 
2674 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2675 @*/
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2677 {
2678   PetscErrorCode ierr;
2679 
2680   PetscFunctionBegin;
2681   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2682   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2683   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2684   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2690 /*@C
2691    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2692    per row in the matrix. For good matrix assembly performance the
2693    user should preallocate the matrix storage by setting the parameter nz
2694    (or the array nnz).  By setting these parameters accurately, performance
2695    during matrix assembly can be increased by more than a factor of 50.
2696 
2697    Collective on MPI_Comm
2698 
2699    Input Parameters:
2700 +  A - the matrix
2701 .  bs - size of block
2702 .  nz - number of block nonzeros per block row (same for all rows)
2703 -  nnz - array containing the number of block nonzeros in the various block rows
2704          (possibly different for each block row) or PETSC_NULL
2705 
2706    Options Database Keys:
2707 .   -mat_no_unroll - uses code that does not unroll the loops in the
2708                      block calculations (much slower)
2709 .    -mat_block_size - size of the blocks to use
2710 
2711    Level: intermediate
2712 
2713    Notes:
2714    If the nnz parameter is given then the nz parameter is ignored
2715 
2716    The block AIJ format is fully compatible with standard Fortran 77
2717    storage.  That is, the stored row and column indices can begin at
2718    either one (as in Fortran) or zero.  See the users' manual for details.
2719 
2720    Specify the preallocated storage with either nz or nnz (not both).
2721    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2722    allocation.  For additional details, see the users manual chapter on
2723    matrices.
2724 
2725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2726 @*/
2727 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2728 {
2729   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2730 
2731   PetscFunctionBegin;
2732   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2733   if (f) {
2734     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2735   }
2736   PetscFunctionReturn(0);
2737 }
2738 
2739