xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ed1caa07d30b4bf52fa72dab33adf574bdef2af0)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1009   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1010   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1011   ierr = PetscFree(col);CHKERRQ(ierr);
1012   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1013   cols = rows + bs;
1014   for (i=0; i<mbs; i++) {
1015     cols[0] = i*bs;
1016     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1017     len = ai[i+1] - ai[i];
1018     for (j=0; j<len; j++) {
1019       rows[0] = (*aj++)*bs;
1020       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1021       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1022       array += bs2;
1023     }
1024   }
1025   ierr = PetscFree(rows);CHKERRQ(ierr);
1026 #if defined(PETSC_USE_MAT_SINGLE)
1027   ierr = PetscFree(array);
1028 #endif
1029 
1030   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032 
1033   if (B) {
1034     *B = C;
1035   } else {
1036     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1037   }
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1043 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1044 {
1045   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1046   PetscErrorCode ierr;
1047   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1048   int            fd;
1049   PetscScalar    *aa;
1050   FILE           *file;
1051 
1052   PetscFunctionBegin;
1053   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1054   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1055   col_lens[0] = MAT_FILE_COOKIE;
1056 
1057   col_lens[1] = A->m;
1058   col_lens[2] = A->n;
1059   col_lens[3] = a->nz*bs2;
1060 
1061   /* store lengths of each row and write (including header) to file */
1062   count = 0;
1063   for (i=0; i<a->mbs; i++) {
1064     for (j=0; j<bs; j++) {
1065       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1066     }
1067   }
1068   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1069   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1070 
1071   /* store column indices (zero start index) */
1072   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1073   count = 0;
1074   for (i=0; i<a->mbs; i++) {
1075     for (j=0; j<bs; j++) {
1076       for (k=a->i[i]; k<a->i[i+1]; k++) {
1077         for (l=0; l<bs; l++) {
1078           jj[count++] = bs*a->j[k] + l;
1079         }
1080       }
1081     }
1082   }
1083   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1084   ierr = PetscFree(jj);CHKERRQ(ierr);
1085 
1086   /* store nonzero values */
1087   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1088   count = 0;
1089   for (i=0; i<a->mbs; i++) {
1090     for (j=0; j<bs; j++) {
1091       for (k=a->i[i]; k<a->i[i+1]; k++) {
1092         for (l=0; l<bs; l++) {
1093           aa[count++] = a->a[bs2*k + l*bs + j];
1094         }
1095       }
1096     }
1097   }
1098   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1099   ierr = PetscFree(aa);CHKERRQ(ierr);
1100 
1101   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1102   if (file) {
1103     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1110 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1111 {
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscErrorCode    ierr;
1114   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1115   PetscViewerFormat format;
1116 
1117   PetscFunctionBegin;
1118   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1119   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1120     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1121   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1122     Mat aij;
1123     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1124     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1125     ierr = MatDestroy(aij);CHKERRQ(ierr);
1126   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1127      PetscFunctionReturn(0);
1128   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1129     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1130     for (i=0; i<a->mbs; i++) {
1131       for (j=0; j<bs; j++) {
1132         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1133         for (k=a->i[i]; k<a->i[i+1]; k++) {
1134           for (l=0; l<bs; l++) {
1135 #if defined(PETSC_USE_COMPLEX)
1136             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1137               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1138                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1139             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1140               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1141                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1142             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1144             }
1145 #else
1146             if (a->a[bs2*k + l*bs + j] != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1148             }
1149 #endif
1150           }
1151         }
1152         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1153       }
1154     }
1155     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1156   } else {
1157     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1158     for (i=0; i<a->mbs; i++) {
1159       for (j=0; j<bs; j++) {
1160         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1161         for (k=a->i[i]; k<a->i[i+1]; k++) {
1162           for (l=0; l<bs; l++) {
1163 #if defined(PETSC_USE_COMPLEX)
1164             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1165               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1166                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1167             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1168               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1169                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1170             } else {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1172             }
1173 #else
1174             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1175 #endif
1176           }
1177         }
1178         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1179       }
1180     }
1181     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1182   }
1183   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1189 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1190 {
1191   Mat            A = (Mat) Aa;
1192   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1195   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1196   MatScalar      *aa;
1197   PetscViewer    viewer;
1198 
1199   PetscFunctionBegin;
1200 
1201   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1202   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1203 
1204   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1205 
1206   /* loop over matrix elements drawing boxes */
1207   color = PETSC_DRAW_BLUE;
1208   for (i=0,row=0; i<mbs; i++,row+=bs) {
1209     for (j=a->i[i]; j<a->i[i+1]; j++) {
1210       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1211       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1212       aa = a->a + j*bs2;
1213       for (k=0; k<bs; k++) {
1214         for (l=0; l<bs; l++) {
1215           if (PetscRealPart(*aa++) >=  0.) continue;
1216           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1217         }
1218       }
1219     }
1220   }
1221   color = PETSC_DRAW_CYAN;
1222   for (i=0,row=0; i<mbs; i++,row+=bs) {
1223     for (j=a->i[i]; j<a->i[i+1]; j++) {
1224       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1225       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1226       aa = a->a + j*bs2;
1227       for (k=0; k<bs; k++) {
1228         for (l=0; l<bs; l++) {
1229           if (PetscRealPart(*aa++) != 0.) continue;
1230           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1231         }
1232       }
1233     }
1234   }
1235 
1236   color = PETSC_DRAW_RED;
1237   for (i=0,row=0; i<mbs; i++,row+=bs) {
1238     for (j=a->i[i]; j<a->i[i+1]; j++) {
1239       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1241       aa = a->a + j*bs2;
1242       for (k=0; k<bs; k++) {
1243         for (l=0; l<bs; l++) {
1244           if (PetscRealPart(*aa++) <= 0.) continue;
1245           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1246         }
1247       }
1248     }
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1255 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1256 {
1257   PetscErrorCode ierr;
1258   PetscReal      xl,yl,xr,yr,w,h;
1259   PetscDraw      draw;
1260   PetscTruth     isnull;
1261 
1262   PetscFunctionBegin;
1263 
1264   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1265   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1266 
1267   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1268   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1269   xr += w;    yr += h;  xl = -w;     yl = -h;
1270   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1271   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1272   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatView_SeqBAIJ"
1278 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1279 {
1280   PetscErrorCode ierr;
1281   PetscTruth     iascii,isbinary,isdraw;
1282 
1283   PetscFunctionBegin;
1284   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1287   if (iascii){
1288     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1289   } else if (isbinary) {
1290     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1291   } else if (isdraw) {
1292     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1293   } else {
1294     Mat B;
1295     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1296     ierr = MatView(B,viewer);CHKERRQ(ierr);
1297     ierr = MatDestroy(B);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1305 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1306 {
1307   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1308   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1309   PetscInt    *ai = a->i,*ailen = a->ilen;
1310   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1311   MatScalar   *ap,*aa = a->a,zero = 0.0;
1312 
1313   PetscFunctionBegin;
1314   for (k=0; k<m; k++) { /* loop over rows */
1315     row  = im[k]; brow = row/bs;
1316     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1317     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1318     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1319     nrow = ailen[brow];
1320     for (l=0; l<n; l++) { /* loop over columns */
1321       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1322       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1323       col  = in[l] ;
1324       bcol = col/bs;
1325       cidx = col%bs;
1326       ridx = row%bs;
1327       high = nrow;
1328       low  = 0; /* assume unsorted */
1329       while (high-low > 5) {
1330         t = (low+high)/2;
1331         if (rp[t] > bcol) high = t;
1332         else             low  = t;
1333       }
1334       for (i=low; i<high; i++) {
1335         if (rp[i] > bcol) break;
1336         if (rp[i] == bcol) {
1337           *v++ = ap[bs2*i+bs*cidx+ridx];
1338           goto finished;
1339         }
1340       }
1341       *v++ = zero;
1342       finished:;
1343     }
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #if defined(PETSC_USE_MAT_SINGLE)
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1351 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1352 {
1353   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1354   PetscErrorCode ierr;
1355   PetscInt       i,N = m*n*b->bs2;
1356   MatScalar      *vsingle;
1357 
1358   PetscFunctionBegin;
1359   if (N > b->setvalueslen) {
1360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1362     b->setvalueslen  = N;
1363   }
1364   vsingle = b->setvaluescopy;
1365   for (i=0; i<N; i++) {
1366     vsingle[i] = v[i];
1367   }
1368   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 #endif
1372 
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1376 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1377 {
1378   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1379   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1380   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1381   PetscErrorCode  ierr;
1382   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1383   PetscTruth      roworiented=a->roworiented;
1384   const MatScalar *value = v;
1385   MatScalar       *ap,*aa = a->a,*bap;
1386 
1387   PetscFunctionBegin;
1388   if (roworiented) {
1389     stepval = (n-1)*bs;
1390   } else {
1391     stepval = (m-1)*bs;
1392   }
1393   for (k=0; k<m; k++) { /* loop over added rows */
1394     row  = im[k];
1395     if (row < 0) continue;
1396 #if defined(PETSC_USE_DEBUG)
1397     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1398 #endif
1399     rp   = aj + ai[row];
1400     ap   = aa + bs2*ai[row];
1401     rmax = imax[row];
1402     nrow = ailen[row];
1403     low  = 0;
1404     high = nrow;
1405     for (l=0; l<n; l++) { /* loop over added columns */
1406       if (in[l] < 0) continue;
1407 #if defined(PETSC_USE_DEBUG)
1408       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1409 #endif
1410       col = in[l];
1411       if (roworiented) {
1412         value = v + k*(stepval+bs)*bs + l*bs;
1413       } else {
1414         value = v + l*(stepval+bs)*bs + k*bs;
1415       }
1416       if (col < lastcol) low = 0; else high = nrow;
1417       lastcol = col;
1418       while (high-low > 7) {
1419         t = (low+high)/2;
1420         if (rp[t] > col) high = t;
1421         else             low  = t;
1422       }
1423       for (i=low; i<high; i++) {
1424         if (rp[i] > col) break;
1425         if (rp[i] == col) {
1426           bap  = ap +  bs2*i;
1427           if (roworiented) {
1428             if (is == ADD_VALUES) {
1429               for (ii=0; ii<bs; ii++,value+=stepval) {
1430                 for (jj=ii; jj<bs2; jj+=bs) {
1431                   bap[jj] += *value++;
1432                 }
1433               }
1434             } else {
1435               for (ii=0; ii<bs; ii++,value+=stepval) {
1436                 for (jj=ii; jj<bs2; jj+=bs) {
1437                   bap[jj] = *value++;
1438                 }
1439               }
1440             }
1441           } else {
1442             if (is == ADD_VALUES) {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=0; jj<bs; jj++) {
1445                   *bap++ += *value++;
1446                 }
1447               }
1448             } else {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=0; jj<bs; jj++) {
1451                   *bap++  = *value++;
1452                 }
1453               }
1454             }
1455           }
1456           goto noinsert2;
1457         }
1458       }
1459       if (nonew == 1) goto noinsert2;
1460       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1461       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1462       N = nrow++ - 1;
1463       /* shift up all the later entries in this row */
1464       for (ii=N; ii>=i; ii--) {
1465         rp[ii+1] = rp[ii];
1466         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1467       }
1468       if (N >= i) {
1469         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1470       }
1471       rp[i] = col;
1472       bap   = ap +  bs2*i;
1473       if (roworiented) {
1474         for (ii=0; ii<bs; ii++,value+=stepval) {
1475           for (jj=ii; jj<bs2; jj+=bs) {
1476             bap[jj] = *value++;
1477           }
1478         }
1479       } else {
1480         for (ii=0; ii<bs; ii++,value+=stepval) {
1481           for (jj=0; jj<bs; jj++) {
1482             *bap++  = *value++;
1483           }
1484         }
1485       }
1486       noinsert2:;
1487       low = i;
1488     }
1489     ailen[row] = nrow;
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1496 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1497 {
1498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1499   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1500   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1501   PetscErrorCode ierr;
1502   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1503   MatScalar      *aa = a->a,*ap;
1504   PetscReal      ratio=0.6;
1505 
1506   PetscFunctionBegin;
1507   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1508 
1509   if (m) rmax = ailen[0];
1510   for (i=1; i<mbs; i++) {
1511     /* move each row back by the amount of empty slots (fshift) before it*/
1512     fshift += imax[i-1] - ailen[i-1];
1513     rmax   = PetscMax(rmax,ailen[i]);
1514     if (fshift) {
1515       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1516       N = ailen[i];
1517       for (j=0; j<N; j++) {
1518         ip[j-fshift] = ip[j];
1519         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1520       }
1521     }
1522     ai[i] = ai[i-1] + ailen[i-1];
1523   }
1524   if (mbs) {
1525     fshift += imax[mbs-1] - ailen[mbs-1];
1526     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1527   }
1528   /* reset ilen and imax for each row */
1529   for (i=0; i<mbs; i++) {
1530     ailen[i] = imax[i] = ai[i+1] - ai[i];
1531   }
1532   a->nz = ai[mbs];
1533 
1534   /* diagonals may have moved, so kill the diagonal pointers */
1535   a->idiagvalid = PETSC_FALSE;
1536   if (fshift && a->diag) {
1537     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1538     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1539     a->diag = 0;
1540   }
1541   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1542   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1543   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1544   a->reallocs          = 0;
1545   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1546 
1547   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1548   if (a->compressedrow.use){
1549     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1550   }
1551 
1552   A->same_nonzero = PETSC_TRUE;
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /*
1557    This function returns an array of flags which indicate the locations of contiguous
1558    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1559    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1560    Assume: sizes should be long enough to hold all the values.
1561 */
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1564 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1565 {
1566   PetscInt   i,j,k,row;
1567   PetscTruth flg;
1568 
1569   PetscFunctionBegin;
1570   for (i=0,j=0; i<n; j++) {
1571     row = idx[i];
1572     if (row%bs!=0) { /* Not the begining of a block */
1573       sizes[j] = 1;
1574       i++;
1575     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1576       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1577       i++;
1578     } else { /* Begining of the block, so check if the complete block exists */
1579       flg = PETSC_TRUE;
1580       for (k=1; k<bs; k++) {
1581         if (row+k != idx[i+k]) { /* break in the block */
1582           flg = PETSC_FALSE;
1583           break;
1584         }
1585       }
1586       if (flg) { /* No break in the bs */
1587         sizes[j] = bs;
1588         i+= bs;
1589       } else {
1590         sizes[j] = 1;
1591         i++;
1592       }
1593     }
1594   }
1595   *bs_max = j;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1601 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1602 {
1603   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1604   PetscErrorCode ierr;
1605   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1606   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1607   PetscScalar    zero = 0.0;
1608   MatScalar      *aa;
1609 
1610   PetscFunctionBegin;
1611   /* Make a copy of the IS and  sort it */
1612   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1613   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1614 
1615   /* allocate memory for rows,sizes */
1616   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1617   sizes = rows + is_n;
1618 
1619   /* copy IS values to rows, and sort them */
1620   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1621   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1622   if (baij->keepzeroedrows) {
1623     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1624     bs_max = is_n;
1625     A->same_nonzero = PETSC_TRUE;
1626   } else {
1627     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1628     A->same_nonzero = PETSC_FALSE;
1629   }
1630   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1631 
1632   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1633     row   = rows[j];
1634     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1635     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1636     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1637     if (sizes[i] == bs && !baij->keepzeroedrows) {
1638       if (diag) {
1639         if (baij->ilen[row/bs] > 0) {
1640           baij->ilen[row/bs]       = 1;
1641           baij->j[baij->i[row/bs]] = row/bs;
1642           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1643         }
1644         /* Now insert all the diagonal values for this bs */
1645         for (k=0; k<bs; k++) {
1646           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1647         }
1648       } else { /* (!diag) */
1649         baij->ilen[row/bs] = 0;
1650       } /* end (!diag) */
1651     } else { /* (sizes[i] != bs) */
1652 #if defined (PETSC_USE_DEBUG)
1653       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1654 #endif
1655       for (k=0; k<count; k++) {
1656         aa[0] =  zero;
1657         aa    += bs;
1658       }
1659       if (diag) {
1660         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1661       }
1662     }
1663   }
1664 
1665   ierr = PetscFree(rows);CHKERRQ(ierr);
1666   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1672 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1673 {
1674   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1675   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1676   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1677   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1678   PetscErrorCode ierr;
1679   PetscInt       ridx,cidx,bs2=a->bs2;
1680   PetscTruth     roworiented=a->roworiented;
1681   MatScalar      *ap,value,*aa=a->a,*bap;
1682 
1683   PetscFunctionBegin;
1684   for (k=0; k<m; k++) { /* loop over added rows */
1685     row  = im[k];
1686     brow = row/bs;
1687     if (row < 0) continue;
1688 #if defined(PETSC_USE_DEBUG)
1689     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1690 #endif
1691     rp   = aj + ai[brow];
1692     ap   = aa + bs2*ai[brow];
1693     rmax = imax[brow];
1694     nrow = ailen[brow];
1695     low  = 0;
1696     high = nrow;
1697     for (l=0; l<n; l++) { /* loop over added columns */
1698       if (in[l] < 0) continue;
1699 #if defined(PETSC_USE_DEBUG)
1700       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1701 #endif
1702       col = in[l]; bcol = col/bs;
1703       ridx = row % bs; cidx = col % bs;
1704       if (roworiented) {
1705         value = v[l + k*n];
1706       } else {
1707         value = v[k + l*m];
1708       }
1709       if (col < lastcol) low = 0; else high = nrow;
1710       lastcol = col;
1711       while (high-low > 7) {
1712         t = (low+high)/2;
1713         if (rp[t] > bcol) high = t;
1714         else              low  = t;
1715       }
1716       for (i=low; i<high; i++) {
1717         if (rp[i] > bcol) break;
1718         if (rp[i] == bcol) {
1719           bap  = ap +  bs2*i + bs*cidx + ridx;
1720           if (is == ADD_VALUES) *bap += value;
1721           else                  *bap  = value;
1722           goto noinsert1;
1723         }
1724       }
1725       if (nonew == 1) goto noinsert1;
1726       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1727       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1728       N = nrow++ - 1;
1729       /* shift up all the later entries in this row */
1730       for (ii=N; ii>=i; ii--) {
1731         rp[ii+1] = rp[ii];
1732         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1733       }
1734       if (N>=i) {
1735         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1736       }
1737       rp[i]                      = bcol;
1738       ap[bs2*i + bs*cidx + ridx] = value;
1739       a->nz++;
1740       noinsert1:;
1741       low = i;
1742     }
1743     ailen[brow] = nrow;
1744   }
1745   A->same_nonzero = PETSC_FALSE;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1752 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1753 {
1754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1755   Mat            outA;
1756   PetscErrorCode ierr;
1757   PetscTruth     row_identity,col_identity;
1758 
1759   PetscFunctionBegin;
1760   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1761   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1762   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1763   if (!row_identity || !col_identity) {
1764     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1765   }
1766 
1767   outA          = inA;
1768   inA->factor   = FACTOR_LU;
1769 
1770   if (!a->diag) {
1771     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1772   }
1773 
1774   a->row        = row;
1775   a->col        = col;
1776   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1777   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1778 
1779   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1780   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1781   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1782 
1783   /*
1784       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1785       for ILU(0) factorization with natural ordering
1786   */
1787   if (inA->bs < 8) {
1788     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1789   } else {
1790     if (!a->solve_work) {
1791       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1792       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1793     }
1794   }
1795 
1796   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1797 
1798   PetscFunctionReturn(0);
1799 }
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1802 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1803 {
1804   static PetscTruth called = PETSC_FALSE;
1805   MPI_Comm          comm = A->comm;
1806   PetscErrorCode    ierr;
1807 
1808   PetscFunctionBegin;
1809   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1810   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1811   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1819 {
1820   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1821   PetscInt    i,nz,nbs;
1822 
1823   PetscFunctionBegin;
1824   nz  = baij->maxnz/baij->bs2;
1825   nbs = baij->nbs;
1826   for (i=0; i<nz; i++) {
1827     baij->j[i] = indices[i];
1828   }
1829   baij->nz = nz;
1830   for (i=0; i<nbs; i++) {
1831     baij->ilen[i] = baij->imax[i];
1832   }
1833 
1834   PetscFunctionReturn(0);
1835 }
1836 EXTERN_C_END
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1840 /*@
1841     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1842        in the matrix.
1843 
1844   Input Parameters:
1845 +  mat - the SeqBAIJ matrix
1846 -  indices - the column indices
1847 
1848   Level: advanced
1849 
1850   Notes:
1851     This can be called if you have precomputed the nonzero structure of the
1852   matrix and want to provide it to the matrix object to improve the performance
1853   of the MatSetValues() operation.
1854 
1855     You MUST have set the correct numbers of nonzeros per row in the call to
1856   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1857 
1858     MUST be called before any calls to MatSetValues();
1859 
1860 @*/
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1862 {
1863   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1864 
1865   PetscFunctionBegin;
1866   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1867   PetscValidPointer(indices,2);
1868   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1869   if (f) {
1870     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1871   } else {
1872     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1879 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1880 {
1881   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1882   PetscErrorCode ierr;
1883   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1884   PetscReal      atmp;
1885   PetscScalar    *x,zero = 0.0;
1886   MatScalar      *aa;
1887   PetscInt       ncols,brow,krow,kcol;
1888 
1889   PetscFunctionBegin;
1890   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1891   bs   = A->bs;
1892   aa   = a->a;
1893   ai   = a->i;
1894   aj   = a->j;
1895   mbs = a->mbs;
1896 
1897   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1898   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1899   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1900   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1901   for (i=0; i<mbs; i++) {
1902     ncols = ai[1] - ai[0]; ai++;
1903     brow  = bs*i;
1904     for (j=0; j<ncols; j++){
1905       /* bcol = bs*(*aj); */
1906       for (kcol=0; kcol<bs; kcol++){
1907         for (krow=0; krow<bs; krow++){
1908           atmp = PetscAbsScalar(*aa); aa++;
1909           row = brow + krow;    /* row index */
1910           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1911           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1912         }
1913       }
1914       aj++;
1915     }
1916   }
1917   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1923 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1934 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1935 {
1936   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1937   PetscFunctionBegin;
1938   *array = a->a;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1944 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1945 {
1946   PetscFunctionBegin;
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #include "petscblaslapack.h"
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1953 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1954 {
1955   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1956   PetscErrorCode ierr;
1957   PetscInt       i,bs=Y->bs,j,bs2;
1958   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1959 
1960   PetscFunctionBegin;
1961   if (str == SAME_NONZERO_PATTERN) {
1962     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1963   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1964     if (y->xtoy && y->XtoY != X) {
1965       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1966       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1967     }
1968     if (!y->xtoy) { /* get xtoy */
1969       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1970       y->XtoY = X;
1971     }
1972     bs2 = bs*bs;
1973     for (i=0; i<x->nz; i++) {
1974       j = 0;
1975       while (j < bs2){
1976         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1977         j++;
1978       }
1979     }
1980     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
1981   } else {
1982     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 /* -------------------------------------------------------------------*/
1988 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1989        MatGetRow_SeqBAIJ,
1990        MatRestoreRow_SeqBAIJ,
1991        MatMult_SeqBAIJ_N,
1992 /* 4*/ MatMultAdd_SeqBAIJ_N,
1993        MatMultTranspose_SeqBAIJ,
1994        MatMultTransposeAdd_SeqBAIJ,
1995        MatSolve_SeqBAIJ_N,
1996        0,
1997        0,
1998 /*10*/ 0,
1999        MatLUFactor_SeqBAIJ,
2000        0,
2001        0,
2002        MatTranspose_SeqBAIJ,
2003 /*15*/ MatGetInfo_SeqBAIJ,
2004        MatEqual_SeqBAIJ,
2005        MatGetDiagonal_SeqBAIJ,
2006        MatDiagonalScale_SeqBAIJ,
2007        MatNorm_SeqBAIJ,
2008 /*20*/ 0,
2009        MatAssemblyEnd_SeqBAIJ,
2010        0,
2011        MatSetOption_SeqBAIJ,
2012        MatZeroEntries_SeqBAIJ,
2013 /*25*/ MatZeroRows_SeqBAIJ,
2014        MatLUFactorSymbolic_SeqBAIJ,
2015        MatLUFactorNumeric_SeqBAIJ_N,
2016        MatCholeskyFactorSymbolic_SeqBAIJ,
2017        MatCholeskyFactorNumeric_SeqBAIJ_N,
2018 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2019        MatILUFactorSymbolic_SeqBAIJ,
2020        MatICCFactorSymbolic_SeqBAIJ,
2021        MatGetArray_SeqBAIJ,
2022        MatRestoreArray_SeqBAIJ,
2023 /*35*/ MatDuplicate_SeqBAIJ,
2024        0,
2025        0,
2026        MatILUFactor_SeqBAIJ,
2027        0,
2028 /*40*/ MatAXPY_SeqBAIJ,
2029        MatGetSubMatrices_SeqBAIJ,
2030        MatIncreaseOverlap_SeqBAIJ,
2031        MatGetValues_SeqBAIJ,
2032        0,
2033 /*45*/ MatPrintHelp_SeqBAIJ,
2034        MatScale_SeqBAIJ,
2035        0,
2036        0,
2037        0,
2038 /*50*/ 0,
2039        MatGetRowIJ_SeqBAIJ,
2040        MatRestoreRowIJ_SeqBAIJ,
2041        0,
2042        0,
2043 /*55*/ 0,
2044        0,
2045        0,
2046        0,
2047        MatSetValuesBlocked_SeqBAIJ,
2048 /*60*/ MatGetSubMatrix_SeqBAIJ,
2049        MatDestroy_SeqBAIJ,
2050        MatView_SeqBAIJ,
2051        MatGetPetscMaps_Petsc,
2052        0,
2053 /*65*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*70*/ MatGetRowMax_SeqBAIJ,
2059        MatConvert_Basic,
2060        0,
2061        0,
2062        0,
2063 /*75*/ 0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*80*/ 0,
2069        0,
2070        0,
2071        0,
2072        MatLoad_SeqBAIJ,
2073 /*85*/ 0,
2074        0,
2075        0,
2076        0,
2077        0,
2078 /*90*/ 0,
2079        0,
2080        0,
2081        0,
2082        0,
2083 /*95*/ 0,
2084        0,
2085        0,
2086        0};
2087 
2088 EXTERN_C_BEGIN
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2091 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2092 {
2093   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2094   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2095   PetscErrorCode ierr;
2096 
2097   PetscFunctionBegin;
2098   if (aij->nonew != 1) {
2099     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2100   }
2101 
2102   /* allocate space for values if not already there */
2103   if (!aij->saved_values) {
2104     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2105   }
2106 
2107   /* copy values over */
2108   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2109   PetscFunctionReturn(0);
2110 }
2111 EXTERN_C_END
2112 
2113 EXTERN_C_BEGIN
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2116 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2117 {
2118   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2119   PetscErrorCode ierr;
2120   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2121 
2122   PetscFunctionBegin;
2123   if (aij->nonew != 1) {
2124     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2125   }
2126   if (!aij->saved_values) {
2127     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2128   }
2129 
2130   /* copy values over */
2131   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 EXTERN_C_END
2135 
2136 EXTERN_C_BEGIN
2137 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2138 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2139 EXTERN_C_END
2140 
2141 EXTERN_C_BEGIN
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2144 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2145 {
2146   Mat_SeqBAIJ    *b;
2147   PetscErrorCode ierr;
2148   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2149   PetscTruth     flg,skipallocation = PETSC_FALSE;
2150 
2151   PetscFunctionBegin;
2152 
2153   if (nz == MAT_SKIP_ALLOCATION) {
2154     skipallocation = PETSC_TRUE;
2155     nz             = 0;
2156   }
2157 
2158   B->preallocated = PETSC_TRUE;
2159   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2160   if (nnz && newbs != bs) {
2161     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2162   }
2163   bs   = newbs;
2164 
2165   mbs  = B->m/bs;
2166   nbs  = B->n/bs;
2167   bs2  = bs*bs;
2168 
2169   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2170     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2171   }
2172 
2173   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2174   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2175   if (nnz) {
2176     for (i=0; i<mbs; i++) {
2177       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2178       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2179     }
2180   }
2181 
2182   b       = (Mat_SeqBAIJ*)B->data;
2183   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2184   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2185   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2186   if (!flg) {
2187     switch (bs) {
2188     case 1:
2189       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2190       B->ops->mult            = MatMult_SeqBAIJ_1;
2191       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2192       break;
2193     case 2:
2194       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2195       B->ops->mult            = MatMult_SeqBAIJ_2;
2196       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2197       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2198       break;
2199     case 3:
2200       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2201       B->ops->mult            = MatMult_SeqBAIJ_3;
2202       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2203       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2204       break;
2205     case 4:
2206       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2207       B->ops->mult            = MatMult_SeqBAIJ_4;
2208       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2209       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2210       break;
2211     case 5:
2212       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2213       B->ops->mult            = MatMult_SeqBAIJ_5;
2214       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2215       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2216       break;
2217     case 6:
2218       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2219       B->ops->mult            = MatMult_SeqBAIJ_6;
2220       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2221       break;
2222     case 7:
2223       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2224       B->ops->mult            = MatMult_SeqBAIJ_7;
2225       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2226       break;
2227     default:
2228       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2229       B->ops->mult            = MatMult_SeqBAIJ_N;
2230       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2231       break;
2232     }
2233   }
2234   B->bs      = bs;
2235   b->mbs     = mbs;
2236   b->nbs     = nbs;
2237   if (!skipallocation) {
2238     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2239     /* b->ilen will count nonzeros in each block row so far. */
2240     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2241     if (!nnz) {
2242       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2243       else if (nz <= 0)        nz = 1;
2244       for (i=0; i<mbs; i++) b->imax[i] = nz;
2245       nz = nz*mbs;
2246     } else {
2247       nz = 0;
2248       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2249     }
2250 
2251     /* allocate the matrix space */
2252     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2253     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2254     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2255     b->singlemalloc = PETSC_TRUE;
2256 
2257     b->i[0] = 0;
2258     for (i=1; i<mbs+1; i++) {
2259       b->i[i] = b->i[i-1] + b->imax[i-1];
2260     }
2261   }
2262 
2263   B->bs               = bs;
2264   b->bs2              = bs2;
2265   b->mbs              = mbs;
2266   b->nz               = 0;
2267   b->maxnz            = nz*bs2;
2268   B->info.nz_unneeded = (PetscReal)b->maxnz;
2269   PetscFunctionReturn(0);
2270 }
2271 EXTERN_C_END
2272 
2273 /*MC
2274    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2275    block sparse compressed row format.
2276 
2277    Options Database Keys:
2278 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2279 
2280   Level: beginner
2281 
2282 .seealso: MatCreateSeqBAIJ
2283 M*/
2284 
2285 EXTERN_C_BEGIN
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatCreate_SeqBAIJ"
2288 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2289 {
2290   PetscErrorCode ierr;
2291   PetscMPIInt    size;
2292   Mat_SeqBAIJ    *b;
2293 
2294   PetscFunctionBegin;
2295   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2296   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2297 
2298   B->m = B->M = PetscMax(B->m,B->M);
2299   B->n = B->N = PetscMax(B->n,B->N);
2300   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2301   B->data = (void*)b;
2302   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2303   B->factor           = 0;
2304   B->mapping          = 0;
2305   b->row              = 0;
2306   b->col              = 0;
2307   b->icol             = 0;
2308   b->reallocs         = 0;
2309   b->saved_values     = 0;
2310 #if defined(PETSC_USE_MAT_SINGLE)
2311   b->setvalueslen     = 0;
2312   b->setvaluescopy    = PETSC_NULL;
2313 #endif
2314 
2315   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2316   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2317 
2318   b->sorted           = PETSC_FALSE;
2319   b->roworiented      = PETSC_TRUE;
2320   b->nonew            = 0;
2321   b->diag             = 0;
2322   b->solve_work       = 0;
2323   b->mult_work        = 0;
2324   B->spptr            = 0;
2325   B->info.nz_unneeded = (PetscReal)b->maxnz;
2326   b->keepzeroedrows   = PETSC_FALSE;
2327   b->xtoy              = 0;
2328   b->XtoY              = 0;
2329   b->compressedrow.use     = PETSC_FALSE;
2330   b->compressedrow.nrows   = 0;
2331   b->compressedrow.i       = PETSC_NULL;
2332   b->compressedrow.rindex  = PETSC_NULL;
2333   b->compressedrow.checked = PETSC_FALSE;
2334   B->same_nonzero          = PETSC_FALSE;
2335 
2336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2337                                      "MatInvertBlockDiagonal_SeqBAIJ",
2338                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2340                                      "MatStoreValues_SeqBAIJ",
2341                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2343                                      "MatRetrieveValues_SeqBAIJ",
2344                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2346                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2347                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2349                                      "MatConvert_SeqBAIJ_SeqAIJ",
2350                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2352                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2353                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2355                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2356                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 EXTERN_C_END
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2363 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2364 {
2365   Mat            C;
2366   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2367   PetscErrorCode ierr;
2368   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2369 
2370   PetscFunctionBegin;
2371   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2372 
2373   *B = 0;
2374   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2375   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2376   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2377   c    = (Mat_SeqBAIJ*)C->data;
2378 
2379   C->M   = A->M;
2380   C->N   = A->N;
2381   C->bs  = A->bs;
2382   c->bs2 = a->bs2;
2383   c->mbs = a->mbs;
2384   c->nbs = a->nbs;
2385 
2386   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2387   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2388   for (i=0; i<mbs; i++) {
2389     c->imax[i] = a->imax[i];
2390     c->ilen[i] = a->ilen[i];
2391   }
2392 
2393   /* allocate the matrix space */
2394   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2395   c->singlemalloc = PETSC_TRUE;
2396   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2397   if (mbs > 0) {
2398     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2399     if (cpvalues == MAT_COPY_VALUES) {
2400       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2401     } else {
2402       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2403     }
2404   }
2405   c->sorted      = a->sorted;
2406   c->roworiented = a->roworiented;
2407   c->nonew       = a->nonew;
2408 
2409   if (a->diag) {
2410     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2411     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2412     for (i=0; i<mbs; i++) {
2413       c->diag[i] = a->diag[i];
2414     }
2415   } else c->diag        = 0;
2416   c->nz                 = a->nz;
2417   c->maxnz              = a->maxnz;
2418   c->solve_work         = 0;
2419   c->mult_work          = 0;
2420   C->preallocated       = PETSC_TRUE;
2421   C->assembled          = PETSC_TRUE;
2422 
2423   c->compressedrow.use     = a->compressedrow.use;
2424   c->compressedrow.nrows   = a->compressedrow.nrows;
2425   c->compressedrow.checked = a->compressedrow.checked;
2426   if ( a->compressedrow.checked && a->compressedrow.use){
2427     i = a->compressedrow.nrows;
2428     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2429     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2430     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2431     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2432   } else {
2433     c->compressedrow.use    = PETSC_FALSE;
2434     c->compressedrow.i      = PETSC_NULL;
2435     c->compressedrow.rindex = PETSC_NULL;
2436   }
2437   C->same_nonzero = A->same_nonzero;
2438   *B = C;
2439   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatLoad_SeqBAIJ"
2445 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2446 {
2447   Mat_SeqBAIJ    *a;
2448   Mat            B;
2449   PetscErrorCode ierr;
2450   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2451   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2452   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2453   PetscInt       *masked,nmask,tmp,bs2,ishift;
2454   PetscMPIInt    size;
2455   int            fd;
2456   PetscScalar    *aa;
2457   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2458 
2459   PetscFunctionBegin;
2460   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2461   bs2  = bs*bs;
2462 
2463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2464   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2465   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2466   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2467   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2468   M = header[1]; N = header[2]; nz = header[3];
2469 
2470   if (header[3] < 0) {
2471     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2472   }
2473 
2474   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2475 
2476   /*
2477      This code adds extra rows to make sure the number of rows is
2478     divisible by the blocksize
2479   */
2480   mbs        = M/bs;
2481   extra_rows = bs - M + bs*(mbs);
2482   if (extra_rows == bs) extra_rows = 0;
2483   else                  mbs++;
2484   if (extra_rows) {
2485     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2486   }
2487 
2488   /* read in row lengths */
2489   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2490   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2491   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2492 
2493   /* read in column indices */
2494   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2495   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2496   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2497 
2498   /* loop over row lengths determining block row lengths */
2499   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2500   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2501   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2502   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2503   masked   = mask + mbs;
2504   rowcount = 0; nzcount = 0;
2505   for (i=0; i<mbs; i++) {
2506     nmask = 0;
2507     for (j=0; j<bs; j++) {
2508       kmax = rowlengths[rowcount];
2509       for (k=0; k<kmax; k++) {
2510         tmp = jj[nzcount++]/bs;
2511         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2512       }
2513       rowcount++;
2514     }
2515     browlengths[i] += nmask;
2516     /* zero out the mask elements we set */
2517     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2518   }
2519 
2520   /* create our matrix */
2521   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2522   ierr = MatSetType(B,type);CHKERRQ(ierr);
2523   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2524   a = (Mat_SeqBAIJ*)B->data;
2525 
2526   /* set matrix "i" values */
2527   a->i[0] = 0;
2528   for (i=1; i<= mbs; i++) {
2529     a->i[i]      = a->i[i-1] + browlengths[i-1];
2530     a->ilen[i-1] = browlengths[i-1];
2531   }
2532   a->nz         = 0;
2533   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2534 
2535   /* read in nonzero values */
2536   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2537   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2538   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2539 
2540   /* set "a" and "j" values into matrix */
2541   nzcount = 0; jcount = 0;
2542   for (i=0; i<mbs; i++) {
2543     nzcountb = nzcount;
2544     nmask    = 0;
2545     for (j=0; j<bs; j++) {
2546       kmax = rowlengths[i*bs+j];
2547       for (k=0; k<kmax; k++) {
2548         tmp = jj[nzcount++]/bs;
2549 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2550       }
2551     }
2552     /* sort the masked values */
2553     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2554 
2555     /* set "j" values into matrix */
2556     maskcount = 1;
2557     for (j=0; j<nmask; j++) {
2558       a->j[jcount++]  = masked[j];
2559       mask[masked[j]] = maskcount++;
2560     }
2561     /* set "a" values into matrix */
2562     ishift = bs2*a->i[i];
2563     for (j=0; j<bs; j++) {
2564       kmax = rowlengths[i*bs+j];
2565       for (k=0; k<kmax; k++) {
2566         tmp       = jj[nzcountb]/bs ;
2567         block     = mask[tmp] - 1;
2568         point     = jj[nzcountb] - bs*tmp;
2569         idx       = ishift + bs2*block + j + bs*point;
2570         a->a[idx] = (MatScalar)aa[nzcountb++];
2571       }
2572     }
2573     /* zero out the mask elements we set */
2574     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2575   }
2576   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2577 
2578   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2579   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2580   ierr = PetscFree(aa);CHKERRQ(ierr);
2581   ierr = PetscFree(jj);CHKERRQ(ierr);
2582   ierr = PetscFree(mask);CHKERRQ(ierr);
2583 
2584   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2585   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatView_Private(B);CHKERRQ(ierr);
2587 
2588   *A = B;
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 #undef __FUNCT__
2593 #define __FUNCT__ "MatCreateSeqBAIJ"
2594 /*@C
2595    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2596    compressed row) format.  For good matrix assembly performance the
2597    user should preallocate the matrix storage by setting the parameter nz
2598    (or the array nnz).  By setting these parameters accurately, performance
2599    during matrix assembly can be increased by more than a factor of 50.
2600 
2601    Collective on MPI_Comm
2602 
2603    Input Parameters:
2604 +  comm - MPI communicator, set to PETSC_COMM_SELF
2605 .  bs - size of block
2606 .  m - number of rows
2607 .  n - number of columns
2608 .  nz - number of nonzero blocks  per block row (same for all rows)
2609 -  nnz - array containing the number of nonzero blocks in the various block rows
2610          (possibly different for each block row) or PETSC_NULL
2611 
2612    Output Parameter:
2613 .  A - the matrix
2614 
2615    Options Database Keys:
2616 .   -mat_no_unroll - uses code that does not unroll the loops in the
2617                      block calculations (much slower)
2618 .    -mat_block_size - size of the blocks to use
2619 
2620    Level: intermediate
2621 
2622    Notes:
2623    The number of rows and columns must be divisible by blocksize.
2624 
2625    If the nnz parameter is given then the nz parameter is ignored
2626 
2627    A nonzero block is any block that as 1 or more nonzeros in it
2628 
2629    The block AIJ format is fully compatible with standard Fortran 77
2630    storage.  That is, the stored row and column indices can begin at
2631    either one (as in Fortran) or zero.  See the users' manual for details.
2632 
2633    Specify the preallocated storage with either nz or nnz (not both).
2634    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2635    allocation.  For additional details, see the users manual chapter on
2636    matrices.
2637 
2638 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2639 @*/
2640 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2641 {
2642   PetscErrorCode ierr;
2643 
2644   PetscFunctionBegin;
2645   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2646   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2647   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2653 /*@C
2654    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2655    per row in the matrix. For good matrix assembly performance the
2656    user should preallocate the matrix storage by setting the parameter nz
2657    (or the array nnz).  By setting these parameters accurately, performance
2658    during matrix assembly can be increased by more than a factor of 50.
2659 
2660    Collective on MPI_Comm
2661 
2662    Input Parameters:
2663 +  A - the matrix
2664 .  bs - size of block
2665 .  nz - number of block nonzeros per block row (same for all rows)
2666 -  nnz - array containing the number of block nonzeros in the various block rows
2667          (possibly different for each block row) or PETSC_NULL
2668 
2669    Options Database Keys:
2670 .   -mat_no_unroll - uses code that does not unroll the loops in the
2671                      block calculations (much slower)
2672 .    -mat_block_size - size of the blocks to use
2673 
2674    Level: intermediate
2675 
2676    Notes:
2677    If the nnz parameter is given then the nz parameter is ignored
2678 
2679    The block AIJ format is fully compatible with standard Fortran 77
2680    storage.  That is, the stored row and column indices can begin at
2681    either one (as in Fortran) or zero.  See the users' manual for details.
2682 
2683    Specify the preallocated storage with either nz or nnz (not both).
2684    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2685    allocation.  For additional details, see the users manual chapter on
2686    matrices.
2687 
2688 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2689 @*/
2690 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2691 {
2692   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2693 
2694   PetscFunctionBegin;
2695   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2696   if (f) {
2697     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2698   }
2699   PetscFunctionReturn(0);
2700 }
2701 
2702