xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f533fdf2fac09d161b4cb34a79e3a3bdb8821e2d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1009   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1010   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1011   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1012   ierr = PetscFree(col);CHKERRQ(ierr);
1013   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1014   cols = rows + bs;
1015   for (i=0; i<mbs; i++) {
1016     cols[0] = i*bs;
1017     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018     len = ai[i+1] - ai[i];
1019     for (j=0; j<len; j++) {
1020       rows[0] = (*aj++)*bs;
1021       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1023       array += bs2;
1024     }
1025   }
1026   ierr = PetscFree(rows);CHKERRQ(ierr);
1027 #if defined(PETSC_USE_MAT_SINGLE)
1028   ierr = PetscFree(array);
1029 #endif
1030 
1031   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033 
1034   if (B) {
1035     *B = C;
1036   } else {
1037     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1044 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045 {
1046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1047   PetscErrorCode ierr;
1048   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049   int            fd;
1050   PetscScalar    *aa;
1051   FILE           *file;
1052 
1053   PetscFunctionBegin;
1054   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1055   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1056   col_lens[0] = MAT_FILE_COOKIE;
1057 
1058   col_lens[1] = A->m;
1059   col_lens[2] = A->n;
1060   col_lens[3] = a->nz*bs2;
1061 
1062   /* store lengths of each row and write (including header) to file */
1063   count = 0;
1064   for (i=0; i<a->mbs; i++) {
1065     for (j=0; j<bs; j++) {
1066       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067     }
1068   }
1069   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1071 
1072   /* store column indices (zero start index) */
1073   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078         for (l=0; l<bs; l++) {
1079           jj[count++] = bs*a->j[k] + l;
1080         }
1081       }
1082     }
1083   }
1084   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1085   ierr = PetscFree(jj);CHKERRQ(ierr);
1086 
1087   /* store nonzero values */
1088   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1089   count = 0;
1090   for (i=0; i<a->mbs; i++) {
1091     for (j=0; j<bs; j++) {
1092       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093         for (l=0; l<bs; l++) {
1094           aa[count++] = a->a[bs2*k + l*bs + j];
1095         }
1096       }
1097     }
1098   }
1099   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1100   ierr = PetscFree(aa);CHKERRQ(ierr);
1101 
1102   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1103   if (file) {
1104     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1111 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112 {
1113   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114   PetscErrorCode    ierr;
1115   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116   PetscViewerFormat format;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1120   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1122   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123     Mat aij;
1124     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1125     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1126     ierr = MatDestroy(aij);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128      PetscFunctionReturn(0);
1129   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1131     for (i=0; i<a->mbs; i++) {
1132       for (j=0; j<bs; j++) {
1133         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1134         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135           for (l=0; l<bs; l++) {
1136 #if defined(PETSC_USE_COMPLEX)
1137             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1143             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             }
1146 #else
1147             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1149             }
1150 #endif
1151           }
1152         }
1153         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1154       }
1155     }
1156     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1159     for (i=0; i<a->mbs; i++) {
1160       for (j=0; j<bs; j++) {
1161         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1162         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163           for (l=0; l<bs; l++) {
1164 #if defined(PETSC_USE_COMPLEX)
1165             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1171             } else {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             }
1174 #else
1175             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1176 #endif
1177           }
1178         }
1179         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1180       }
1181     }
1182     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1183   }
1184   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1190 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191 {
1192   Mat            A = (Mat) Aa;
1193   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1194   PetscErrorCode ierr;
1195   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197   MatScalar      *aa;
1198   PetscViewer    viewer;
1199 
1200   PetscFunctionBegin;
1201 
1202   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1204 
1205   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1206 
1207   /* loop over matrix elements drawing boxes */
1208   color = PETSC_DRAW_BLUE;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) >=  0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   color = PETSC_DRAW_CYAN;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) != 0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236 
1237   color = PETSC_DRAW_RED;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) <= 0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1256 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscReal      xl,yl,xr,yr,w,h;
1260   PetscDraw      draw;
1261   PetscTruth     isnull;
1262 
1263   PetscFunctionBegin;
1264 
1265   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1266   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1267 
1268   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1269   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270   xr += w;    yr += h;  xl = -w;     yl = -h;
1271   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1272   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ"
1279 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscTruth     iascii,isbinary,isdraw;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1288   if (iascii){
1289     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1290   } else if (isbinary) {
1291     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1292   } else if (isdraw) {
1293     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1294   } else {
1295     Mat B;
1296     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1297     ierr = MatView(B,viewer);CHKERRQ(ierr);
1298     ierr = MatDestroy(B);CHKERRQ(ierr);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1306 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307 {
1308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310   PetscInt    *ai = a->i,*ailen = a->ilen;
1311   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312   MatScalar   *ap,*aa = a->a,zero = 0.0;
1313 
1314   PetscFunctionBegin;
1315   for (k=0; k<m; k++) { /* loop over rows */
1316     row  = im[k]; brow = row/bs;
1317     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320     nrow = ailen[brow];
1321     for (l=0; l<n; l++) { /* loop over columns */
1322       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324       col  = in[l] ;
1325       bcol = col/bs;
1326       cidx = col%bs;
1327       ridx = row%bs;
1328       high = nrow;
1329       low  = 0; /* assume unsorted */
1330       while (high-low > 5) {
1331         t = (low+high)/2;
1332         if (rp[t] > bcol) high = t;
1333         else             low  = t;
1334       }
1335       for (i=low; i<high; i++) {
1336         if (rp[i] > bcol) break;
1337         if (rp[i] == bcol) {
1338           *v++ = ap[bs2*i+bs*cidx+ridx];
1339           goto finished;
1340         }
1341       }
1342       *v++ = zero;
1343       finished:;
1344     }
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_USE_MAT_SINGLE)
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1352 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353 {
1354   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1355   PetscErrorCode ierr;
1356   PetscInt       i,N = m*n*b->bs2;
1357   MatScalar      *vsingle;
1358 
1359   PetscFunctionBegin;
1360   if (N > b->setvalueslen) {
1361     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1362     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1363     b->setvalueslen  = N;
1364   }
1365   vsingle = b->setvaluescopy;
1366   for (i=0; i<N; i++) {
1367     vsingle[i] = v[i];
1368   }
1369   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 #endif
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1377 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378 {
1379   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382   PetscErrorCode  ierr;
1383   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384   PetscTruth      roworiented=a->roworiented;
1385   const MatScalar *value = v;
1386   MatScalar       *ap,*aa = a->a,*bap;
1387 
1388   PetscFunctionBegin;
1389   if (roworiented) {
1390     stepval = (n-1)*bs;
1391   } else {
1392     stepval = (m-1)*bs;
1393   }
1394   for (k=0; k<m; k++) { /* loop over added rows */
1395     row  = im[k];
1396     if (row < 0) continue;
1397 #if defined(PETSC_USE_DEBUG)
1398     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399 #endif
1400     rp   = aj + ai[row];
1401     ap   = aa + bs2*ai[row];
1402     rmax = imax[row];
1403     nrow = ailen[row];
1404     low  = 0;
1405     high = nrow;
1406     for (l=0; l<n; l++) { /* loop over added columns */
1407       if (in[l] < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410 #endif
1411       col = in[l];
1412       if (roworiented) {
1413         value = v + k*(stepval+bs)*bs + l*bs;
1414       } else {
1415         value = v + l*(stepval+bs)*bs + k*bs;
1416       }
1417       if (col <= lastcol) low = 0; else high = nrow;
1418       lastcol = col;
1419       while (high-low > 7) {
1420         t = (low+high)/2;
1421         if (rp[t] > col) high = t;
1422         else             low  = t;
1423       }
1424       for (i=low; i<high; i++) {
1425         if (rp[i] > col) break;
1426         if (rp[i] == col) {
1427           bap  = ap +  bs2*i;
1428           if (roworiented) {
1429             if (is == ADD_VALUES) {
1430               for (ii=0; ii<bs; ii++,value+=stepval) {
1431                 for (jj=ii; jj<bs2; jj+=bs) {
1432                   bap[jj] += *value++;
1433                 }
1434               }
1435             } else {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] = *value++;
1439                 }
1440               }
1441             }
1442           } else {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=0; jj<bs; jj++) {
1446                   *bap++ += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++  = *value++;
1453                 }
1454               }
1455             }
1456           }
1457           goto noinsert2;
1458         }
1459       }
1460       if (nonew == 1) goto noinsert2;
1461       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1463       N = nrow++ - 1;
1464       /* shift up all the later entries in this row */
1465       for (ii=N; ii>=i; ii--) {
1466         rp[ii+1] = rp[ii];
1467         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1468       }
1469       if (N >= i) {
1470         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       rp[i] = col;
1473       bap   = ap +  bs2*i;
1474       if (roworiented) {
1475         for (ii=0; ii<bs; ii++,value+=stepval) {
1476           for (jj=ii; jj<bs2; jj+=bs) {
1477             bap[jj] = *value++;
1478           }
1479         }
1480       } else {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=0; jj<bs; jj++) {
1483             *bap++  = *value++;
1484           }
1485         }
1486       }
1487       noinsert2:;
1488       low = i;
1489     }
1490     ailen[row] = nrow;
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498 {
1499   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1502   PetscErrorCode ierr;
1503   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504   MatScalar      *aa = a->a,*ap;
1505   PetscReal      ratio=0.6;
1506 
1507   PetscFunctionBegin;
1508   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1509 
1510   if (m) rmax = ailen[0];
1511   for (i=1; i<mbs; i++) {
1512     /* move each row back by the amount of empty slots (fshift) before it*/
1513     fshift += imax[i-1] - ailen[i-1];
1514     rmax   = PetscMax(rmax,ailen[i]);
1515     if (fshift) {
1516       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1517       N = ailen[i];
1518       for (j=0; j<N; j++) {
1519         ip[j-fshift] = ip[j];
1520         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1521       }
1522     }
1523     ai[i] = ai[i-1] + ailen[i-1];
1524   }
1525   if (mbs) {
1526     fshift += imax[mbs-1] - ailen[mbs-1];
1527     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1528   }
1529   /* reset ilen and imax for each row */
1530   for (i=0; i<mbs; i++) {
1531     ailen[i] = imax[i] = ai[i+1] - ai[i];
1532   }
1533   a->nz = ai[mbs];
1534 
1535   /* diagonals may have moved, so kill the diagonal pointers */
1536   a->idiagvalid = PETSC_FALSE;
1537   if (fshift && a->diag) {
1538     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1539     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1540     a->diag = 0;
1541   }
1542   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1543   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1544   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1545   a->reallocs          = 0;
1546   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1547 
1548   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1549   if (a->compressedrow.use){
1550     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1551   }
1552 
1553   A->same_nonzero = PETSC_TRUE;
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /*
1558    This function returns an array of flags which indicate the locations of contiguous
1559    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1560    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1561    Assume: sizes should be long enough to hold all the values.
1562 */
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1565 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1566 {
1567   PetscInt   i,j,k,row;
1568   PetscTruth flg;
1569 
1570   PetscFunctionBegin;
1571   for (i=0,j=0; i<n; j++) {
1572     row = idx[i];
1573     if (row%bs!=0) { /* Not the begining of a block */
1574       sizes[j] = 1;
1575       i++;
1576     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1577       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1578       i++;
1579     } else { /* Begining of the block, so check if the complete block exists */
1580       flg = PETSC_TRUE;
1581       for (k=1; k<bs; k++) {
1582         if (row+k != idx[i+k]) { /* break in the block */
1583           flg = PETSC_FALSE;
1584           break;
1585         }
1586       }
1587       if (flg) { /* No break in the bs */
1588         sizes[j] = bs;
1589         i+= bs;
1590       } else {
1591         sizes[j] = 1;
1592         i++;
1593       }
1594     }
1595   }
1596   *bs_max = j;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1602 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1603 {
1604   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1605   PetscErrorCode ierr;
1606   PetscInt       i,j,k,count,*rows;
1607   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1608   PetscScalar    zero = 0.0;
1609   MatScalar      *aa;
1610 
1611   PetscFunctionBegin;
1612   /* Make a copy of the IS and  sort it */
1613   /* allocate memory for rows,sizes */
1614   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1615   sizes = rows + is_n;
1616 
1617   /* copy IS values to rows, and sort them */
1618   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1619   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1620   if (baij->keepzeroedrows) {
1621     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1622     bs_max = is_n;
1623     A->same_nonzero = PETSC_TRUE;
1624   } else {
1625     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1626     A->same_nonzero = PETSC_FALSE;
1627   }
1628 
1629   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1630     row   = rows[j];
1631     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1632     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1633     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1634     if (sizes[i] == bs && !baij->keepzeroedrows) {
1635       if (diag != 0.0) {
1636         if (baij->ilen[row/bs] > 0) {
1637           baij->ilen[row/bs]       = 1;
1638           baij->j[baij->i[row/bs]] = row/bs;
1639           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1640         }
1641         /* Now insert all the diagonal values for this bs */
1642         for (k=0; k<bs; k++) {
1643           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1644         }
1645       } else { /* (diag == 0.0) */
1646         baij->ilen[row/bs] = 0;
1647       } /* end (diag == 0.0) */
1648     } else { /* (sizes[i] != bs) */
1649 #if defined (PETSC_USE_DEBUG)
1650       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1651 #endif
1652       for (k=0; k<count; k++) {
1653         aa[0] =  zero;
1654         aa    += bs;
1655       }
1656       if (diag != 0.0) {
1657         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658       }
1659     }
1660   }
1661 
1662   ierr = PetscFree(rows);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1669 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1670 {
1671   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1672   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1673   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1674   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1675   PetscErrorCode ierr;
1676   PetscInt       ridx,cidx,bs2=a->bs2;
1677   PetscTruth     roworiented=a->roworiented;
1678   MatScalar      *ap,value,*aa=a->a,*bap;
1679 
1680   PetscFunctionBegin;
1681   for (k=0; k<m; k++) { /* loop over added rows */
1682     row  = im[k];
1683     brow = row/bs;
1684     if (row < 0) continue;
1685 #if defined(PETSC_USE_DEBUG)
1686     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1687 #endif
1688     rp   = aj + ai[brow];
1689     ap   = aa + bs2*ai[brow];
1690     rmax = imax[brow];
1691     nrow = ailen[brow];
1692     low  = 0;
1693     high = nrow;
1694     for (l=0; l<n; l++) { /* loop over added columns */
1695       if (in[l] < 0) continue;
1696 #if defined(PETSC_USE_DEBUG)
1697       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1698 #endif
1699       col = in[l]; bcol = col/bs;
1700       ridx = row % bs; cidx = col % bs;
1701       if (roworiented) {
1702         value = v[l + k*n];
1703       } else {
1704         value = v[k + l*m];
1705       }
1706       if (col <= lastcol) low = 0; else high = nrow;
1707       lastcol = col;
1708       while (high-low > 7) {
1709         t = (low+high)/2;
1710         if (rp[t] > bcol) high = t;
1711         else              low  = t;
1712       }
1713       for (i=low; i<high; i++) {
1714         if (rp[i] > bcol) break;
1715         if (rp[i] == bcol) {
1716           bap  = ap +  bs2*i + bs*cidx + ridx;
1717           if (is == ADD_VALUES) *bap += value;
1718           else                  *bap  = value;
1719           goto noinsert1;
1720         }
1721       }
1722       if (nonew == 1) goto noinsert1;
1723       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1724       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1725       N = nrow++ - 1;
1726       /* shift up all the later entries in this row */
1727       for (ii=N; ii>=i; ii--) {
1728         rp[ii+1] = rp[ii];
1729         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1730       }
1731       if (N>=i) {
1732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1733       }
1734       rp[i]                      = bcol;
1735       ap[bs2*i + bs*cidx + ridx] = value;
1736       a->nz++;
1737       noinsert1:;
1738       low = i;
1739     }
1740     ailen[brow] = nrow;
1741   }
1742   A->same_nonzero = PETSC_FALSE;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1749 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1750 {
1751   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1752   Mat            outA;
1753   PetscErrorCode ierr;
1754   PetscTruth     row_identity,col_identity;
1755 
1756   PetscFunctionBegin;
1757   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1758   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1759   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1760   if (!row_identity || !col_identity) {
1761     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1762   }
1763 
1764   outA          = inA;
1765   inA->factor   = FACTOR_LU;
1766 
1767   if (!a->diag) {
1768     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1769   }
1770 
1771   a->row        = row;
1772   a->col        = col;
1773   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1774   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1775 
1776   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1777   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1778   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1779 
1780   /*
1781       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1782       for ILU(0) factorization with natural ordering
1783   */
1784   if (inA->bs < 8) {
1785     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1786   } else {
1787     if (!a->solve_work) {
1788       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1789       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1790     }
1791   }
1792 
1793   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1794 
1795   PetscFunctionReturn(0);
1796 }
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1799 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1800 {
1801   static PetscTruth called = PETSC_FALSE;
1802   MPI_Comm          comm = A->comm;
1803   PetscErrorCode    ierr;
1804 
1805   PetscFunctionBegin;
1806   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1807   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1808   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1816 {
1817   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1818   PetscInt    i,nz,nbs;
1819 
1820   PetscFunctionBegin;
1821   nz  = baij->maxnz/baij->bs2;
1822   nbs = baij->nbs;
1823   for (i=0; i<nz; i++) {
1824     baij->j[i] = indices[i];
1825   }
1826   baij->nz = nz;
1827   for (i=0; i<nbs; i++) {
1828     baij->ilen[i] = baij->imax[i];
1829   }
1830 
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1837 /*@
1838     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1839        in the matrix.
1840 
1841   Input Parameters:
1842 +  mat - the SeqBAIJ matrix
1843 -  indices - the column indices
1844 
1845   Level: advanced
1846 
1847   Notes:
1848     This can be called if you have precomputed the nonzero structure of the
1849   matrix and want to provide it to the matrix object to improve the performance
1850   of the MatSetValues() operation.
1851 
1852     You MUST have set the correct numbers of nonzeros per row in the call to
1853   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1854 
1855     MUST be called before any calls to MatSetValues();
1856 
1857 @*/
1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1859 {
1860   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1861 
1862   PetscFunctionBegin;
1863   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1864   PetscValidPointer(indices,2);
1865   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1866   if (f) {
1867     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1868   } else {
1869     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1876 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1877 {
1878   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1881   PetscReal      atmp;
1882   PetscScalar    *x,zero = 0.0;
1883   MatScalar      *aa;
1884   PetscInt       ncols,brow,krow,kcol;
1885 
1886   PetscFunctionBegin;
1887   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1888   bs   = A->bs;
1889   aa   = a->a;
1890   ai   = a->i;
1891   aj   = a->j;
1892   mbs = a->mbs;
1893 
1894   ierr = VecSet(v,zero);CHKERRQ(ierr);
1895   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1896   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1897   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1898   for (i=0; i<mbs; i++) {
1899     ncols = ai[1] - ai[0]; ai++;
1900     brow  = bs*i;
1901     for (j=0; j<ncols; j++){
1902       /* bcol = bs*(*aj); */
1903       for (kcol=0; kcol<bs; kcol++){
1904         for (krow=0; krow<bs; krow++){
1905           atmp = PetscAbsScalar(*aa); aa++;
1906           row = brow + krow;    /* row index */
1907           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1908           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1909         }
1910       }
1911       aj++;
1912     }
1913   }
1914   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCopy_SeqBAIJ"
1920 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   /* If the two matrices have the same copy implementation, use fast copy. */
1926   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1927     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1929 
1930     if (a->i[A->m] != b->i[B->m]) {
1931       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1932     }
1933     ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr);
1934   } else {
1935     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1936   }
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1942 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1943 {
1944   PetscErrorCode ierr;
1945 
1946   PetscFunctionBegin;
1947   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1953 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1954 {
1955   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956   PetscFunctionBegin;
1957   *array = a->a;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1963 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1964 {
1965   PetscFunctionBegin;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #include "petscblaslapack.h"
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1972 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1973 {
1974   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1975   PetscErrorCode ierr;
1976   PetscInt       i,bs=Y->bs,j,bs2;
1977   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1978 
1979   PetscFunctionBegin;
1980   if (str == SAME_NONZERO_PATTERN) {
1981     PetscScalar alpha = a;
1982     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1983   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1984     if (y->xtoy && y->XtoY != X) {
1985       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1986       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1987     }
1988     if (!y->xtoy) { /* get xtoy */
1989       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1990       y->XtoY = X;
1991     }
1992     bs2 = bs*bs;
1993     for (i=0; i<x->nz; i++) {
1994       j = 0;
1995       while (j < bs2){
1996         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1997         j++;
1998       }
1999     }
2000     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2001   } else {
2002     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2009 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2010 {
2011   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2012   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2013   PetscScalar    *aa = a->a;
2014 
2015   PetscFunctionBegin;
2016   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2022 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2023 {
2024   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2025   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2026   PetscScalar    *aa = a->a;
2027 
2028   PetscFunctionBegin;
2029   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 
2034 /* -------------------------------------------------------------------*/
2035 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2036        MatGetRow_SeqBAIJ,
2037        MatRestoreRow_SeqBAIJ,
2038        MatMult_SeqBAIJ_N,
2039 /* 4*/ MatMultAdd_SeqBAIJ_N,
2040        MatMultTranspose_SeqBAIJ,
2041        MatMultTransposeAdd_SeqBAIJ,
2042        MatSolve_SeqBAIJ_N,
2043        0,
2044        0,
2045 /*10*/ 0,
2046        MatLUFactor_SeqBAIJ,
2047        0,
2048        0,
2049        MatTranspose_SeqBAIJ,
2050 /*15*/ MatGetInfo_SeqBAIJ,
2051        MatEqual_SeqBAIJ,
2052        MatGetDiagonal_SeqBAIJ,
2053        MatDiagonalScale_SeqBAIJ,
2054        MatNorm_SeqBAIJ,
2055 /*20*/ 0,
2056        MatAssemblyEnd_SeqBAIJ,
2057        0,
2058        MatSetOption_SeqBAIJ,
2059        MatZeroEntries_SeqBAIJ,
2060 /*25*/ MatZeroRows_SeqBAIJ,
2061        MatLUFactorSymbolic_SeqBAIJ,
2062        MatLUFactorNumeric_SeqBAIJ_N,
2063        MatCholeskyFactorSymbolic_SeqBAIJ,
2064        MatCholeskyFactorNumeric_SeqBAIJ_N,
2065 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2066        MatILUFactorSymbolic_SeqBAIJ,
2067        MatICCFactorSymbolic_SeqBAIJ,
2068        MatGetArray_SeqBAIJ,
2069        MatRestoreArray_SeqBAIJ,
2070 /*35*/ MatDuplicate_SeqBAIJ,
2071        0,
2072        0,
2073        MatILUFactor_SeqBAIJ,
2074        0,
2075 /*40*/ MatAXPY_SeqBAIJ,
2076        MatGetSubMatrices_SeqBAIJ,
2077        MatIncreaseOverlap_SeqBAIJ,
2078        MatGetValues_SeqBAIJ,
2079        MatCopy_SeqBAIJ,
2080 /*45*/ MatPrintHelp_SeqBAIJ,
2081        MatScale_SeqBAIJ,
2082        0,
2083        0,
2084        0,
2085 /*50*/ 0,
2086        MatGetRowIJ_SeqBAIJ,
2087        MatRestoreRowIJ_SeqBAIJ,
2088        0,
2089        0,
2090 /*55*/ 0,
2091        0,
2092        0,
2093        0,
2094        MatSetValuesBlocked_SeqBAIJ,
2095 /*60*/ MatGetSubMatrix_SeqBAIJ,
2096        MatDestroy_SeqBAIJ,
2097        MatView_SeqBAIJ,
2098        MatGetPetscMaps_Petsc,
2099        0,
2100 /*65*/ 0,
2101        0,
2102        0,
2103        0,
2104        0,
2105 /*70*/ MatGetRowMax_SeqBAIJ,
2106        MatConvert_Basic,
2107        0,
2108        0,
2109        0,
2110 /*75*/ 0,
2111        0,
2112        0,
2113        0,
2114        0,
2115 /*80*/ 0,
2116        0,
2117        0,
2118        0,
2119        MatLoad_SeqBAIJ,
2120 /*85*/ 0,
2121        0,
2122        0,
2123        0,
2124        0,
2125 /*90*/ 0,
2126        0,
2127        0,
2128        0,
2129        0,
2130 /*95*/ 0,
2131        0,
2132        0,
2133        0,
2134        0,
2135 /*100*/0,
2136        0,
2137        0,
2138        0,
2139        0,
2140 /*105*/0,
2141        MatRealPart_SeqBAIJ,
2142        MatImaginaryPart_SeqBAIJ
2143 };
2144 
2145 EXTERN_C_BEGIN
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2148 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2149 {
2150   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2151   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   if (aij->nonew != 1) {
2156     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2157   }
2158 
2159   /* allocate space for values if not already there */
2160   if (!aij->saved_values) {
2161     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2162   }
2163 
2164   /* copy values over */
2165   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 EXTERN_C_END
2169 
2170 EXTERN_C_BEGIN
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2173 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2174 {
2175   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2176   PetscErrorCode ierr;
2177   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2178 
2179   PetscFunctionBegin;
2180   if (aij->nonew != 1) {
2181     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2182   }
2183   if (!aij->saved_values) {
2184     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2185   }
2186 
2187   /* copy values over */
2188   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 EXTERN_C_END
2192 
2193 EXTERN_C_BEGIN
2194 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2196 EXTERN_C_END
2197 
2198 EXTERN_C_BEGIN
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2201 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2202 {
2203   Mat_SeqBAIJ    *b;
2204   PetscErrorCode ierr;
2205   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2206   PetscTruth     flg,skipallocation = PETSC_FALSE;
2207 
2208   PetscFunctionBegin;
2209 
2210   if (nz == MAT_SKIP_ALLOCATION) {
2211     skipallocation = PETSC_TRUE;
2212     nz             = 0;
2213   }
2214 
2215   B->preallocated = PETSC_TRUE;
2216   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2217   if (nnz && newbs != bs) {
2218     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2219   }
2220   bs   = newbs;
2221 
2222   mbs  = B->m/bs;
2223   nbs  = B->n/bs;
2224   bs2  = bs*bs;
2225 
2226   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2227     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2228   }
2229 
2230   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2231   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2232   if (nnz) {
2233     for (i=0; i<mbs; i++) {
2234       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2235       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2236     }
2237   }
2238 
2239   b       = (Mat_SeqBAIJ*)B->data;
2240   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2241   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2242   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2243   if (!flg) {
2244     switch (bs) {
2245     case 1:
2246       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2247       B->ops->mult            = MatMult_SeqBAIJ_1;
2248       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2249       break;
2250     case 2:
2251       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2252       B->ops->mult            = MatMult_SeqBAIJ_2;
2253       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2254       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2255       break;
2256     case 3:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2258       B->ops->mult            = MatMult_SeqBAIJ_3;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2260       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2261       break;
2262     case 4:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2264       B->ops->mult            = MatMult_SeqBAIJ_4;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2266       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2267       break;
2268     case 5:
2269       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2270       B->ops->mult            = MatMult_SeqBAIJ_5;
2271       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2272       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2273       break;
2274     case 6:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2276       B->ops->mult            = MatMult_SeqBAIJ_6;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2278       break;
2279     case 7:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2281       B->ops->mult            = MatMult_SeqBAIJ_7;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2283       break;
2284     default:
2285       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2286       B->ops->mult            = MatMult_SeqBAIJ_N;
2287       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2288       break;
2289     }
2290   }
2291   B->bs      = bs;
2292   b->mbs     = mbs;
2293   b->nbs     = nbs;
2294   if (!skipallocation) {
2295     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2296     /* b->ilen will count nonzeros in each block row so far. */
2297     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2298     if (!nnz) {
2299       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2300       else if (nz <= 0)        nz = 1;
2301       for (i=0; i<mbs; i++) b->imax[i] = nz;
2302       nz = nz*mbs;
2303     } else {
2304       nz = 0;
2305       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2306     }
2307 
2308     /* allocate the matrix space */
2309     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2310     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2311     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2312     b->singlemalloc = PETSC_TRUE;
2313 
2314     b->i[0] = 0;
2315     for (i=1; i<mbs+1; i++) {
2316       b->i[i] = b->i[i-1] + b->imax[i-1];
2317     }
2318   }
2319 
2320   B->bs               = bs;
2321   b->bs2              = bs2;
2322   b->mbs              = mbs;
2323   b->nz               = 0;
2324   b->maxnz            = nz*bs2;
2325   B->info.nz_unneeded = (PetscReal)b->maxnz;
2326   PetscFunctionReturn(0);
2327 }
2328 EXTERN_C_END
2329 
2330 /*MC
2331    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2332    block sparse compressed row format.
2333 
2334    Options Database Keys:
2335 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2336 
2337   Level: beginner
2338 
2339 .Seealso: MatCreateSeqBAIJ
2340 M*/
2341 
2342 EXTERN_C_BEGIN
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatCreate_SeqBAIJ"
2345 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2346 {
2347   PetscErrorCode ierr;
2348   PetscMPIInt    size;
2349   Mat_SeqBAIJ    *b;
2350 
2351   PetscFunctionBegin;
2352   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2353   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2354 
2355   B->m = B->M = PetscMax(B->m,B->M);
2356   B->n = B->N = PetscMax(B->n,B->N);
2357   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2358   B->data = (void*)b;
2359   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2360   B->factor           = 0;
2361   B->mapping          = 0;
2362   b->row              = 0;
2363   b->col              = 0;
2364   b->icol             = 0;
2365   b->reallocs         = 0;
2366   b->saved_values     = 0;
2367 #if defined(PETSC_USE_MAT_SINGLE)
2368   b->setvalueslen     = 0;
2369   b->setvaluescopy    = PETSC_NULL;
2370 #endif
2371 
2372   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2373   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2374 
2375   b->sorted           = PETSC_FALSE;
2376   b->roworiented      = PETSC_TRUE;
2377   b->nonew            = 0;
2378   b->diag             = 0;
2379   b->solve_work       = 0;
2380   b->mult_work        = 0;
2381   B->spptr            = 0;
2382   B->info.nz_unneeded = (PetscReal)b->maxnz;
2383   b->keepzeroedrows   = PETSC_FALSE;
2384   b->xtoy              = 0;
2385   b->XtoY              = 0;
2386   b->compressedrow.use     = PETSC_FALSE;
2387   b->compressedrow.nrows   = 0;
2388   b->compressedrow.i       = PETSC_NULL;
2389   b->compressedrow.rindex  = PETSC_NULL;
2390   b->compressedrow.checked = PETSC_FALSE;
2391   B->same_nonzero          = PETSC_FALSE;
2392 
2393   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2394                                      "MatInvertBlockDiagonal_SeqBAIJ",
2395                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2397                                      "MatStoreValues_SeqBAIJ",
2398                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2400                                      "MatRetrieveValues_SeqBAIJ",
2401                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2403                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2404                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2406                                      "MatConvert_SeqBAIJ_SeqAIJ",
2407                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2409                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2410                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2412                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2413                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 EXTERN_C_END
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2420 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2421 {
2422   Mat            C;
2423   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2424   PetscErrorCode ierr;
2425   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2426 
2427   PetscFunctionBegin;
2428   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2429 
2430   *B = 0;
2431   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2432   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2433   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2434   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2435   c    = (Mat_SeqBAIJ*)C->data;
2436 
2437   C->M   = A->M;
2438   C->N   = A->N;
2439   C->bs  = A->bs;
2440   c->bs2 = a->bs2;
2441   c->mbs = a->mbs;
2442   c->nbs = a->nbs;
2443 
2444   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2445   for (i=0; i<mbs; i++) {
2446     c->imax[i] = a->imax[i];
2447     c->ilen[i] = a->ilen[i];
2448   }
2449 
2450   /* allocate the matrix space */
2451   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2452   c->singlemalloc = PETSC_TRUE;
2453   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2454   if (mbs > 0) {
2455     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2456     if (cpvalues == MAT_COPY_VALUES) {
2457       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2458     } else {
2459       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2460     }
2461   }
2462   c->sorted      = a->sorted;
2463   c->roworiented = a->roworiented;
2464   c->nonew       = a->nonew;
2465 
2466   if (a->diag) {
2467     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2468     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2469     for (i=0; i<mbs; i++) {
2470       c->diag[i] = a->diag[i];
2471     }
2472   } else c->diag        = 0;
2473   c->nz                 = a->nz;
2474   c->maxnz              = a->maxnz;
2475   c->solve_work         = 0;
2476   c->mult_work          = 0;
2477   C->preallocated       = PETSC_TRUE;
2478   C->assembled          = PETSC_TRUE;
2479 
2480   c->compressedrow.use     = a->compressedrow.use;
2481   c->compressedrow.nrows   = a->compressedrow.nrows;
2482   c->compressedrow.checked = a->compressedrow.checked;
2483   if ( a->compressedrow.checked && a->compressedrow.use){
2484     i = a->compressedrow.nrows;
2485     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2486     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2487     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2488     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2489   } else {
2490     c->compressedrow.use    = PETSC_FALSE;
2491     c->compressedrow.i      = PETSC_NULL;
2492     c->compressedrow.rindex = PETSC_NULL;
2493   }
2494   C->same_nonzero = A->same_nonzero;
2495   *B = C;
2496   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatLoad_SeqBAIJ"
2502 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2503 {
2504   Mat_SeqBAIJ    *a;
2505   Mat            B;
2506   PetscErrorCode ierr;
2507   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2508   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2509   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2510   PetscInt       *masked,nmask,tmp,bs2,ishift;
2511   PetscMPIInt    size;
2512   int            fd;
2513   PetscScalar    *aa;
2514   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2515 
2516   PetscFunctionBegin;
2517   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2518   bs2  = bs*bs;
2519 
2520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2521   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2522   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2523   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2524   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2525   M = header[1]; N = header[2]; nz = header[3];
2526 
2527   if (header[3] < 0) {
2528     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2529   }
2530 
2531   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2532 
2533   /*
2534      This code adds extra rows to make sure the number of rows is
2535     divisible by the blocksize
2536   */
2537   mbs        = M/bs;
2538   extra_rows = bs - M + bs*(mbs);
2539   if (extra_rows == bs) extra_rows = 0;
2540   else                  mbs++;
2541   if (extra_rows) {
2542     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2543   }
2544 
2545   /* read in row lengths */
2546   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2547   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2548   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2549 
2550   /* read in column indices */
2551   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2552   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2553   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2554 
2555   /* loop over row lengths determining block row lengths */
2556   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2557   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2558   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2559   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2560   masked   = mask + mbs;
2561   rowcount = 0; nzcount = 0;
2562   for (i=0; i<mbs; i++) {
2563     nmask = 0;
2564     for (j=0; j<bs; j++) {
2565       kmax = rowlengths[rowcount];
2566       for (k=0; k<kmax; k++) {
2567         tmp = jj[nzcount++]/bs;
2568         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2569       }
2570       rowcount++;
2571     }
2572     browlengths[i] += nmask;
2573     /* zero out the mask elements we set */
2574     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2575   }
2576 
2577   /* create our matrix */
2578   ierr = MatCreate(comm,&B);
2579   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2580   ierr = MatSetType(B,type);CHKERRQ(ierr);
2581   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2582   a = (Mat_SeqBAIJ*)B->data;
2583 
2584   /* set matrix "i" values */
2585   a->i[0] = 0;
2586   for (i=1; i<= mbs; i++) {
2587     a->i[i]      = a->i[i-1] + browlengths[i-1];
2588     a->ilen[i-1] = browlengths[i-1];
2589   }
2590   a->nz         = 0;
2591   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2592 
2593   /* read in nonzero values */
2594   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2595   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2596   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2597 
2598   /* set "a" and "j" values into matrix */
2599   nzcount = 0; jcount = 0;
2600   for (i=0; i<mbs; i++) {
2601     nzcountb = nzcount;
2602     nmask    = 0;
2603     for (j=0; j<bs; j++) {
2604       kmax = rowlengths[i*bs+j];
2605       for (k=0; k<kmax; k++) {
2606         tmp = jj[nzcount++]/bs;
2607 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2608       }
2609     }
2610     /* sort the masked values */
2611     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2612 
2613     /* set "j" values into matrix */
2614     maskcount = 1;
2615     for (j=0; j<nmask; j++) {
2616       a->j[jcount++]  = masked[j];
2617       mask[masked[j]] = maskcount++;
2618     }
2619     /* set "a" values into matrix */
2620     ishift = bs2*a->i[i];
2621     for (j=0; j<bs; j++) {
2622       kmax = rowlengths[i*bs+j];
2623       for (k=0; k<kmax; k++) {
2624         tmp       = jj[nzcountb]/bs ;
2625         block     = mask[tmp] - 1;
2626         point     = jj[nzcountb] - bs*tmp;
2627         idx       = ishift + bs2*block + j + bs*point;
2628         a->a[idx] = (MatScalar)aa[nzcountb++];
2629       }
2630     }
2631     /* zero out the mask elements we set */
2632     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2633   }
2634   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2635 
2636   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2637   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2638   ierr = PetscFree(aa);CHKERRQ(ierr);
2639   ierr = PetscFree(jj);CHKERRQ(ierr);
2640   ierr = PetscFree(mask);CHKERRQ(ierr);
2641 
2642   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2643   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2644   ierr = MatView_Private(B);CHKERRQ(ierr);
2645 
2646   *A = B;
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatCreateSeqBAIJ"
2652 /*@C
2653    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2654    compressed row) format.  For good matrix assembly performance the
2655    user should preallocate the matrix storage by setting the parameter nz
2656    (or the array nnz).  By setting these parameters accurately, performance
2657    during matrix assembly can be increased by more than a factor of 50.
2658 
2659    Collective on MPI_Comm
2660 
2661    Input Parameters:
2662 +  comm - MPI communicator, set to PETSC_COMM_SELF
2663 .  bs - size of block
2664 .  m - number of rows
2665 .  n - number of columns
2666 .  nz - number of nonzero blocks  per block row (same for all rows)
2667 -  nnz - array containing the number of nonzero blocks in the various block rows
2668          (possibly different for each block row) or PETSC_NULL
2669 
2670    Output Parameter:
2671 .  A - the matrix
2672 
2673    Options Database Keys:
2674 .   -mat_no_unroll - uses code that does not unroll the loops in the
2675                      block calculations (much slower)
2676 .    -mat_block_size - size of the blocks to use
2677 
2678    Level: intermediate
2679 
2680    Notes:
2681    The number of rows and columns must be divisible by blocksize.
2682 
2683    If the nnz parameter is given then the nz parameter is ignored
2684 
2685    A nonzero block is any block that as 1 or more nonzeros in it
2686 
2687    The block AIJ format is fully compatible with standard Fortran 77
2688    storage.  That is, the stored row and column indices can begin at
2689    either one (as in Fortran) or zero.  See the users' manual for details.
2690 
2691    Specify the preallocated storage with either nz or nnz (not both).
2692    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2693    allocation.  For additional details, see the users manual chapter on
2694    matrices.
2695 
2696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2697 @*/
2698 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2699 {
2700   PetscErrorCode ierr;
2701 
2702   PetscFunctionBegin;
2703   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2704   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2705   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2706   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2712 /*@C
2713    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2714    per row in the matrix. For good matrix assembly performance the
2715    user should preallocate the matrix storage by setting the parameter nz
2716    (or the array nnz).  By setting these parameters accurately, performance
2717    during matrix assembly can be increased by more than a factor of 50.
2718 
2719    Collective on MPI_Comm
2720 
2721    Input Parameters:
2722 +  A - the matrix
2723 .  bs - size of block
2724 .  nz - number of block nonzeros per block row (same for all rows)
2725 -  nnz - array containing the number of block nonzeros in the various block rows
2726          (possibly different for each block row) or PETSC_NULL
2727 
2728    Options Database Keys:
2729 .   -mat_no_unroll - uses code that does not unroll the loops in the
2730                      block calculations (much slower)
2731 .    -mat_block_size - size of the blocks to use
2732 
2733    Level: intermediate
2734 
2735    Notes:
2736    If the nnz parameter is given then the nz parameter is ignored
2737 
2738    The block AIJ format is fully compatible with standard Fortran 77
2739    storage.  That is, the stored row and column indices can begin at
2740    either one (as in Fortran) or zero.  See the users' manual for details.
2741 
2742    Specify the preallocated storage with either nz or nnz (not both).
2743    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2744    allocation.  For additional details, see the users manual chapter on
2745    matrices.
2746 
2747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2748 @*/
2749 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2750 {
2751   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2752 
2753   PetscFunctionBegin;
2754   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2755   if (f) {
2756     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2757   }
2758   PetscFunctionReturn(0);
2759 }
2760 
2761