xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c009d632fb973c28d49abfd3882a50f0b1cd4845)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785   PetscInt     *tia, *tja;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
792   } else {
793     tia = a->i; tja = a->j;
794   }
795 
796   if (!blockcompressed) {
797     /* malloc & create the natural set of indices */
798     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
799     for (i=0; i<n+1; i++) {
800       for (j=0; j<bs; j++) {
801         *ia[i*bs+j] = tia[i]*bs+j;
802       }
803     }
804     for (i=0; i<nz; i++) {
805       for (j=0; j<bs; j++) {
806         *ja[i*bs+j] = tia[i]*bs+j;
807       }
808     }
809     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810       ierr = PetscFree(tia);CHKERRQ(ierr);
811       ierr = PetscFree(tja);CHKERRQ(ierr);
812     }
813   } else {
814     *ia = tia;
815     *ja = tja;
816   }
817   if (oshift == 1) {
818     for (i=0; i<nz; i++) (*ja)[i]++;
819     for (i=0; i<n+1; i++) (*ia)[i]++;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,n = a->mbs,nz = a->i[n];
831 
832   PetscFunctionBegin;
833   if (!ia) PetscFunctionReturn(0);
834   if (!blockcompressed) {
835     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
836   }else if (symmetric) {
837     ierr = PetscFree(*ia);CHKERRQ(ierr);
838     ierr = PetscFree(*ja);CHKERRQ(ierr);
839   } else if (oshift == 1) { /* blockcompressed */
840     for (i=0; i<nz; i++) a->j[i]--;
841     for (i=0; i<n+1; i++) a->i[i]--;
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDestroy_SeqBAIJ"
848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849 {
850   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854 #if defined(PETSC_USE_LOG)
855   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856 #endif
857   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
858   if (a->row) {
859     ierr = ISDestroy(a->row);CHKERRQ(ierr);
860   }
861   if (a->col) {
862     ierr = ISDestroy(a->col);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(a->diag);CHKERRQ(ierr);
865   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
866   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
867   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
868   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
869   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
870   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
871 #if defined(PETSC_USE_MAT_SINGLE)
872   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
873 #endif
874   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
875   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
876 
877   ierr = PetscFree(a);CHKERRQ(ierr);
878 
879   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
880   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
881   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
882   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetOption_SeqBAIJ"
892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
893 {
894   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   switch (op) {
899   case MAT_ROW_ORIENTED:
900     a->roworiented    = PETSC_TRUE;
901     break;
902   case MAT_COLUMN_ORIENTED:
903     a->roworiented    = PETSC_FALSE;
904     break;
905   case MAT_COLUMNS_SORTED:
906     a->sorted         = PETSC_TRUE;
907     break;
908   case MAT_COLUMNS_UNSORTED:
909     a->sorted         = PETSC_FALSE;
910     break;
911   case MAT_KEEP_ZEROED_ROWS:
912     a->keepzeroedrows = PETSC_TRUE;
913     break;
914   case MAT_NO_NEW_NONZERO_LOCATIONS:
915     a->nonew          = 1;
916     break;
917   case MAT_NEW_NONZERO_LOCATION_ERR:
918     a->nonew          = -1;
919     break;
920   case MAT_NEW_NONZERO_ALLOCATION_ERR:
921     a->nonew          = -2;
922     break;
923   case MAT_YES_NEW_NONZERO_LOCATIONS:
924     a->nonew          = 0;
925     break;
926   case MAT_ROWS_SORTED:
927   case MAT_ROWS_UNSORTED:
928   case MAT_YES_NEW_DIAGONALS:
929   case MAT_IGNORE_OFF_PROC_ENTRIES:
930   case MAT_USE_HASH_TABLE:
931     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
932     break;
933   case MAT_NO_NEW_DIAGONALS:
934     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
935   case MAT_SYMMETRIC:
936   case MAT_STRUCTURALLY_SYMMETRIC:
937   case MAT_NOT_SYMMETRIC:
938   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
939   case MAT_HERMITIAN:
940   case MAT_NOT_HERMITIAN:
941   case MAT_SYMMETRY_ETERNAL:
942   case MAT_NOT_SYMMETRY_ETERNAL:
943     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
944     break;
945   default:
946     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatGetRow_SeqBAIJ"
953 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954 {
955   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
958   MatScalar      *aa,*aa_i;
959   PetscScalar    *v_i;
960 
961   PetscFunctionBegin;
962   bs  = A->rmap.bs;
963   ai  = a->i;
964   aj  = a->j;
965   aa  = a->a;
966   bs2 = a->bs2;
967 
968   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
969 
970   bn  = row/bs;   /* Block number */
971   bp  = row % bs; /* Block Position */
972   M   = ai[bn+1] - ai[bn];
973   *nz = bs*M;
974 
975   if (v) {
976     *v = 0;
977     if (*nz) {
978       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
979       for (i=0; i<M; i++) { /* for each block in the block row */
980         v_i  = *v + i*bs;
981         aa_i = aa + bs2*(ai[bn] + i);
982         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
983       }
984     }
985   }
986 
987   if (idx) {
988     *idx = 0;
989     if (*nz) {
990       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
991       for (i=0; i<M; i++) { /* for each block in the block row */
992         idx_i = *idx + i*bs;
993         itmp  = bs*aj[ai[bn] + i];
994         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
995       }
996     }
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1003 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1009   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1015 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1016 {
1017   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1018   Mat            C;
1019   PetscErrorCode ierr;
1020   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1021   PetscInt       *rows,*cols,bs2=a->bs2;
1022   PetscScalar    *array;
1023 
1024   PetscFunctionBegin;
1025   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1026   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1027   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1028 
1029 #if defined(PETSC_USE_MAT_SINGLE)
1030   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1031   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1032 #else
1033   array = a->a;
1034 #endif
1035 
1036   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1037   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1038   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1039   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1040   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1041   ierr = PetscFree(col);CHKERRQ(ierr);
1042   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1043   cols = rows + bs;
1044   for (i=0; i<mbs; i++) {
1045     cols[0] = i*bs;
1046     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1047     len = ai[i+1] - ai[i];
1048     for (j=0; j<len; j++) {
1049       rows[0] = (*aj++)*bs;
1050       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1051       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1052       array += bs2;
1053     }
1054   }
1055   ierr = PetscFree(rows);CHKERRQ(ierr);
1056 #if defined(PETSC_USE_MAT_SINGLE)
1057   ierr = PetscFree(array);
1058 #endif
1059 
1060   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062 
1063   if (B) {
1064     *B = C;
1065   } else {
1066     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1073 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1074 {
1075   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1078   int            fd;
1079   PetscScalar    *aa;
1080   FILE           *file;
1081 
1082   PetscFunctionBegin;
1083   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1084   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1085   col_lens[0] = MAT_FILE_COOKIE;
1086 
1087   col_lens[1] = A->rmap.N;
1088   col_lens[2] = A->cmap.n;
1089   col_lens[3] = a->nz*bs2;
1090 
1091   /* store lengths of each row and write (including header) to file */
1092   count = 0;
1093   for (i=0; i<a->mbs; i++) {
1094     for (j=0; j<bs; j++) {
1095       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1096     }
1097   }
1098   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1100 
1101   /* store column indices (zero start index) */
1102   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1103   count = 0;
1104   for (i=0; i<a->mbs; i++) {
1105     for (j=0; j<bs; j++) {
1106       for (k=a->i[i]; k<a->i[i+1]; k++) {
1107         for (l=0; l<bs; l++) {
1108           jj[count++] = bs*a->j[k] + l;
1109         }
1110       }
1111     }
1112   }
1113   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1114   ierr = PetscFree(jj);CHKERRQ(ierr);
1115 
1116   /* store nonzero values */
1117   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1118   count = 0;
1119   for (i=0; i<a->mbs; i++) {
1120     for (j=0; j<bs; j++) {
1121       for (k=a->i[i]; k<a->i[i+1]; k++) {
1122         for (l=0; l<bs; l++) {
1123           aa[count++] = a->a[bs2*k + l*bs + j];
1124         }
1125       }
1126     }
1127   }
1128   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1129   ierr = PetscFree(aa);CHKERRQ(ierr);
1130 
1131   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1132   if (file) {
1133     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1140 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1141 {
1142   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1143   PetscErrorCode    ierr;
1144   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1145   PetscViewerFormat format;
1146 
1147   PetscFunctionBegin;
1148   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1149   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1150     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1151   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1152     Mat aij;
1153     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1154     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1155     ierr = MatDestroy(aij);CHKERRQ(ierr);
1156   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1157      PetscFunctionReturn(0);
1158   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1159     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1160     for (i=0; i<a->mbs; i++) {
1161       for (j=0; j<bs; j++) {
1162         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1163         for (k=a->i[i]; k<a->i[i+1]; k++) {
1164           for (l=0; l<bs; l++) {
1165 #if defined(PETSC_USE_COMPLEX)
1166             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1167               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1168                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1169             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1170               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1171                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1172             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             }
1175 #else
1176             if (a->a[bs2*k + l*bs + j] != 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1178             }
1179 #endif
1180           }
1181         }
1182         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1183       }
1184     }
1185     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1186   } else {
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1188     for (i=0; i<a->mbs; i++) {
1189       for (j=0; j<bs; j++) {
1190         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1191         for (k=a->i[i]; k<a->i[i+1]; k++) {
1192           for (l=0; l<bs; l++) {
1193 #if defined(PETSC_USE_COMPLEX)
1194             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1195               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1196                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1197             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1198               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1199                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1200             } else {
1201               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1202             }
1203 #else
1204             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1205 #endif
1206           }
1207         }
1208         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1209       }
1210     }
1211     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1212   }
1213   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1219 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1220 {
1221   Mat            A = (Mat) Aa;
1222   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1223   PetscErrorCode ierr;
1224   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1225   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1226   MatScalar      *aa;
1227   PetscViewer    viewer;
1228 
1229   PetscFunctionBegin;
1230 
1231   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1232   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1233 
1234   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1235 
1236   /* loop over matrix elements drawing boxes */
1237   color = PETSC_DRAW_BLUE;
1238   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1241       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242       aa = a->a + j*bs2;
1243       for (k=0; k<bs; k++) {
1244         for (l=0; l<bs; l++) {
1245           if (PetscRealPart(*aa++) >=  0.) continue;
1246           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1247         }
1248       }
1249     }
1250   }
1251   color = PETSC_DRAW_CYAN;
1252   for (i=0,row=0; i<mbs; i++,row+=bs) {
1253     for (j=a->i[i]; j<a->i[i+1]; j++) {
1254       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1255       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1256       aa = a->a + j*bs2;
1257       for (k=0; k<bs; k++) {
1258         for (l=0; l<bs; l++) {
1259           if (PetscRealPart(*aa++) != 0.) continue;
1260           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1261         }
1262       }
1263     }
1264   }
1265 
1266   color = PETSC_DRAW_RED;
1267   for (i=0,row=0; i<mbs; i++,row+=bs) {
1268     for (j=a->i[i]; j<a->i[i+1]; j++) {
1269       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1270       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1271       aa = a->a + j*bs2;
1272       for (k=0; k<bs; k++) {
1273         for (l=0; l<bs; l++) {
1274           if (PetscRealPart(*aa++) <= 0.) continue;
1275           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1276         }
1277       }
1278     }
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1285 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscReal      xl,yl,xr,yr,w,h;
1289   PetscDraw      draw;
1290   PetscTruth     isnull;
1291 
1292   PetscFunctionBegin;
1293 
1294   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1295   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1296 
1297   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1298   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1299   xr += w;    yr += h;  xl = -w;     yl = -h;
1300   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1301   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1302   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatView_SeqBAIJ"
1308 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1309 {
1310   PetscErrorCode ierr;
1311   PetscTruth     iascii,isbinary,isdraw;
1312 
1313   PetscFunctionBegin;
1314   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1315   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1316   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1317   if (iascii){
1318     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1319   } else if (isbinary) {
1320     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1321   } else if (isdraw) {
1322     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1323   } else {
1324     Mat B;
1325     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1326     ierr = MatView(B,viewer);CHKERRQ(ierr);
1327     ierr = MatDestroy(B);CHKERRQ(ierr);
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1335 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1336 {
1337   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1338   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1339   PetscInt    *ai = a->i,*ailen = a->ilen;
1340   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1341   MatScalar   *ap,*aa = a->a,zero = 0.0;
1342 
1343   PetscFunctionBegin;
1344   for (k=0; k<m; k++) { /* loop over rows */
1345     row  = im[k]; brow = row/bs;
1346     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1347     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1348     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1349     nrow = ailen[brow];
1350     for (l=0; l<n; l++) { /* loop over columns */
1351       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1352       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1353       col  = in[l] ;
1354       bcol = col/bs;
1355       cidx = col%bs;
1356       ridx = row%bs;
1357       high = nrow;
1358       low  = 0; /* assume unsorted */
1359       while (high-low > 5) {
1360         t = (low+high)/2;
1361         if (rp[t] > bcol) high = t;
1362         else             low  = t;
1363       }
1364       for (i=low; i<high; i++) {
1365         if (rp[i] > bcol) break;
1366         if (rp[i] == bcol) {
1367           *v++ = ap[bs2*i+bs*cidx+ridx];
1368           goto finished;
1369         }
1370       }
1371       *v++ = zero;
1372       finished:;
1373     }
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #if defined(PETSC_USE_MAT_SINGLE)
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1382 {
1383   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1384   PetscErrorCode ierr;
1385   PetscInt       i,N = m*n*b->bs2;
1386   MatScalar      *vsingle;
1387 
1388   PetscFunctionBegin;
1389   if (N > b->setvalueslen) {
1390     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1391     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1392     b->setvalueslen  = N;
1393   }
1394   vsingle = b->setvaluescopy;
1395   for (i=0; i<N; i++) {
1396     vsingle[i] = v[i];
1397   }
1398   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 #endif
1402 
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1406 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1407 {
1408   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1409   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1410   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1411   PetscErrorCode  ierr;
1412   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1413   PetscTruth      roworiented=a->roworiented;
1414   const MatScalar *value = v;
1415   MatScalar       *ap,*aa = a->a,*bap;
1416 
1417   PetscFunctionBegin;
1418   if (roworiented) {
1419     stepval = (n-1)*bs;
1420   } else {
1421     stepval = (m-1)*bs;
1422   }
1423   for (k=0; k<m; k++) { /* loop over added rows */
1424     row  = im[k];
1425     if (row < 0) continue;
1426 #if defined(PETSC_USE_DEBUG)
1427     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1428 #endif
1429     rp   = aj + ai[row];
1430     ap   = aa + bs2*ai[row];
1431     rmax = imax[row];
1432     nrow = ailen[row];
1433     low  = 0;
1434     high = nrow;
1435     for (l=0; l<n; l++) { /* loop over added columns */
1436       if (in[l] < 0) continue;
1437 #if defined(PETSC_USE_DEBUG)
1438       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1439 #endif
1440       col = in[l];
1441       if (roworiented) {
1442         value = v + k*(stepval+bs)*bs + l*bs;
1443       } else {
1444         value = v + l*(stepval+bs)*bs + k*bs;
1445       }
1446       if (col <= lastcol) low = 0; else high = nrow;
1447       lastcol = col;
1448       while (high-low > 7) {
1449         t = (low+high)/2;
1450         if (rp[t] > col) high = t;
1451         else             low  = t;
1452       }
1453       for (i=low; i<high; i++) {
1454         if (rp[i] > col) break;
1455         if (rp[i] == col) {
1456           bap  = ap +  bs2*i;
1457           if (roworiented) {
1458             if (is == ADD_VALUES) {
1459               for (ii=0; ii<bs; ii++,value+=stepval) {
1460                 for (jj=ii; jj<bs2; jj+=bs) {
1461                   bap[jj] += *value++;
1462                 }
1463               }
1464             } else {
1465               for (ii=0; ii<bs; ii++,value+=stepval) {
1466                 for (jj=ii; jj<bs2; jj+=bs) {
1467                   bap[jj] = *value++;
1468                 }
1469               }
1470             }
1471           } else {
1472             if (is == ADD_VALUES) {
1473               for (ii=0; ii<bs; ii++,value+=stepval) {
1474                 for (jj=0; jj<bs; jj++) {
1475                   *bap++ += *value++;
1476                 }
1477               }
1478             } else {
1479               for (ii=0; ii<bs; ii++,value+=stepval) {
1480                 for (jj=0; jj<bs; jj++) {
1481                   *bap++  = *value++;
1482                 }
1483               }
1484             }
1485           }
1486           goto noinsert2;
1487         }
1488       }
1489       if (nonew == 1) goto noinsert2;
1490       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1491       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1492       N = nrow++ - 1; high++;
1493       /* shift up all the later entries in this row */
1494       for (ii=N; ii>=i; ii--) {
1495         rp[ii+1] = rp[ii];
1496         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1497       }
1498       if (N >= i) {
1499         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1500       }
1501       rp[i] = col;
1502       bap   = ap +  bs2*i;
1503       if (roworiented) {
1504         for (ii=0; ii<bs; ii++,value+=stepval) {
1505           for (jj=ii; jj<bs2; jj+=bs) {
1506             bap[jj] = *value++;
1507           }
1508         }
1509       } else {
1510         for (ii=0; ii<bs; ii++,value+=stepval) {
1511           for (jj=0; jj<bs; jj++) {
1512             *bap++  = *value++;
1513           }
1514         }
1515       }
1516       noinsert2:;
1517       low = i;
1518     }
1519     ailen[row] = nrow;
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1530   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1531   PetscErrorCode ierr;
1532   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1533   MatScalar      *aa = a->a,*ap;
1534   PetscReal      ratio=0.6;
1535 
1536   PetscFunctionBegin;
1537   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1538 
1539   if (m) rmax = ailen[0];
1540   for (i=1; i<mbs; i++) {
1541     /* move each row back by the amount of empty slots (fshift) before it*/
1542     fshift += imax[i-1] - ailen[i-1];
1543     rmax   = PetscMax(rmax,ailen[i]);
1544     if (fshift) {
1545       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1546       N = ailen[i];
1547       for (j=0; j<N; j++) {
1548         ip[j-fshift] = ip[j];
1549         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1550       }
1551     }
1552     ai[i] = ai[i-1] + ailen[i-1];
1553   }
1554   if (mbs) {
1555     fshift += imax[mbs-1] - ailen[mbs-1];
1556     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1557   }
1558   /* reset ilen and imax for each row */
1559   for (i=0; i<mbs; i++) {
1560     ailen[i] = imax[i] = ai[i+1] - ai[i];
1561   }
1562   a->nz = ai[mbs];
1563 
1564   /* diagonals may have moved, so kill the diagonal pointers */
1565   a->idiagvalid = PETSC_FALSE;
1566   if (fshift && a->diag) {
1567     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1568     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1569     a->diag = 0;
1570   }
1571   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1572   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1573   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1574   a->reallocs          = 0;
1575   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1576 
1577   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1578   if (a->compressedrow.use){
1579     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1580   }
1581 
1582   A->same_nonzero = PETSC_TRUE;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 /*
1587    This function returns an array of flags which indicate the locations of contiguous
1588    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1589    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1590    Assume: sizes should be long enough to hold all the values.
1591 */
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1595 {
1596   PetscInt   i,j,k,row;
1597   PetscTruth flg;
1598 
1599   PetscFunctionBegin;
1600   for (i=0,j=0; i<n; j++) {
1601     row = idx[i];
1602     if (row%bs!=0) { /* Not the begining of a block */
1603       sizes[j] = 1;
1604       i++;
1605     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1606       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1607       i++;
1608     } else { /* Begining of the block, so check if the complete block exists */
1609       flg = PETSC_TRUE;
1610       for (k=1; k<bs; k++) {
1611         if (row+k != idx[i+k]) { /* break in the block */
1612           flg = PETSC_FALSE;
1613           break;
1614         }
1615       }
1616       if (flg) { /* No break in the bs */
1617         sizes[j] = bs;
1618         i+= bs;
1619       } else {
1620         sizes[j] = 1;
1621         i++;
1622       }
1623     }
1624   }
1625   *bs_max = j;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1632 {
1633   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1634   PetscErrorCode ierr;
1635   PetscInt       i,j,k,count,*rows;
1636   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1637   PetscScalar    zero = 0.0;
1638   MatScalar      *aa;
1639 
1640   PetscFunctionBegin;
1641   /* Make a copy of the IS and  sort it */
1642   /* allocate memory for rows,sizes */
1643   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1644   sizes = rows + is_n;
1645 
1646   /* copy IS values to rows, and sort them */
1647   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1648   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1649   if (baij->keepzeroedrows) {
1650     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1651     bs_max = is_n;
1652     A->same_nonzero = PETSC_TRUE;
1653   } else {
1654     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1655     A->same_nonzero = PETSC_FALSE;
1656   }
1657 
1658   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1659     row   = rows[j];
1660     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1661     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1662     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1663     if (sizes[i] == bs && !baij->keepzeroedrows) {
1664       if (diag != 0.0) {
1665         if (baij->ilen[row/bs] > 0) {
1666           baij->ilen[row/bs]       = 1;
1667           baij->j[baij->i[row/bs]] = row/bs;
1668           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1669         }
1670         /* Now insert all the diagonal values for this bs */
1671         for (k=0; k<bs; k++) {
1672           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1673         }
1674       } else { /* (diag == 0.0) */
1675         baij->ilen[row/bs] = 0;
1676       } /* end (diag == 0.0) */
1677     } else { /* (sizes[i] != bs) */
1678 #if defined (PETSC_USE_DEBUG)
1679       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1680 #endif
1681       for (k=0; k<count; k++) {
1682         aa[0] =  zero;
1683         aa    += bs;
1684       }
1685       if (diag != 0.0) {
1686         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1687       }
1688     }
1689   }
1690 
1691   ierr = PetscFree(rows);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1698 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1699 {
1700   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1701   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1702   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1703   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1704   PetscErrorCode ierr;
1705   PetscInt       ridx,cidx,bs2=a->bs2;
1706   PetscTruth     roworiented=a->roworiented;
1707   MatScalar      *ap,value,*aa=a->a,*bap;
1708 
1709   PetscFunctionBegin;
1710   for (k=0; k<m; k++) { /* loop over added rows */
1711     row  = im[k];
1712     brow = row/bs;
1713     if (row < 0) continue;
1714 #if defined(PETSC_USE_DEBUG)
1715     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1716 #endif
1717     rp   = aj + ai[brow];
1718     ap   = aa + bs2*ai[brow];
1719     rmax = imax[brow];
1720     nrow = ailen[brow];
1721     low  = 0;
1722     high = nrow;
1723     for (l=0; l<n; l++) { /* loop over added columns */
1724       if (in[l] < 0) continue;
1725 #if defined(PETSC_USE_DEBUG)
1726       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1727 #endif
1728       col = in[l]; bcol = col/bs;
1729       ridx = row % bs; cidx = col % bs;
1730       if (roworiented) {
1731         value = v[l + k*n];
1732       } else {
1733         value = v[k + l*m];
1734       }
1735       if (col <= lastcol) low = 0; else high = nrow;
1736       lastcol = col;
1737       while (high-low > 7) {
1738         t = (low+high)/2;
1739         if (rp[t] > bcol) high = t;
1740         else              low  = t;
1741       }
1742       for (i=low; i<high; i++) {
1743         if (rp[i] > bcol) break;
1744         if (rp[i] == bcol) {
1745           bap  = ap +  bs2*i + bs*cidx + ridx;
1746           if (is == ADD_VALUES) *bap += value;
1747           else                  *bap  = value;
1748           goto noinsert1;
1749         }
1750       }
1751       if (nonew == 1) goto noinsert1;
1752       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1753       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1754       N = nrow++ - 1; high++;
1755       /* shift up all the later entries in this row */
1756       for (ii=N; ii>=i; ii--) {
1757         rp[ii+1] = rp[ii];
1758         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1759       }
1760       if (N>=i) {
1761         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1762       }
1763       rp[i]                      = bcol;
1764       ap[bs2*i + bs*cidx + ridx] = value;
1765       a->nz++;
1766       noinsert1:;
1767       low = i;
1768     }
1769     ailen[brow] = nrow;
1770   }
1771   A->same_nonzero = PETSC_FALSE;
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1779 {
1780   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1781   Mat            outA;
1782   PetscErrorCode ierr;
1783   PetscTruth     row_identity,col_identity;
1784 
1785   PetscFunctionBegin;
1786   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1787   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1788   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1789   if (!row_identity || !col_identity) {
1790     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1791   }
1792 
1793   outA          = inA;
1794   inA->factor   = FACTOR_LU;
1795 
1796   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1797 
1798   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1799   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1800   a->row = row;
1801   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1802   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1803   a->col = col;
1804 
1805   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1806   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1807   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1808 
1809   /*
1810       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1811       for ILU(0) factorization with natural ordering
1812   */
1813   if (inA->rmap.bs < 8) {
1814     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1815   } else {
1816     if (!a->solve_work) {
1817       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1818       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1819     }
1820   }
1821 
1822   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1823 
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 EXTERN_C_BEGIN
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1830 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1831 {
1832   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1833   PetscInt    i,nz,nbs;
1834 
1835   PetscFunctionBegin;
1836   nz  = baij->maxnz/baij->bs2;
1837   nbs = baij->nbs;
1838   for (i=0; i<nz; i++) {
1839     baij->j[i] = indices[i];
1840   }
1841   baij->nz = nz;
1842   for (i=0; i<nbs; i++) {
1843     baij->ilen[i] = baij->imax[i];
1844   }
1845 
1846   PetscFunctionReturn(0);
1847 }
1848 EXTERN_C_END
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1852 /*@
1853     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1854        in the matrix.
1855 
1856   Input Parameters:
1857 +  mat - the SeqBAIJ matrix
1858 -  indices - the column indices
1859 
1860   Level: advanced
1861 
1862   Notes:
1863     This can be called if you have precomputed the nonzero structure of the
1864   matrix and want to provide it to the matrix object to improve the performance
1865   of the MatSetValues() operation.
1866 
1867     You MUST have set the correct numbers of nonzeros per row in the call to
1868   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1869 
1870     MUST be called before any calls to MatSetValues();
1871 
1872 @*/
1873 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1874 {
1875   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1876 
1877   PetscFunctionBegin;
1878   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1879   PetscValidPointer(indices,2);
1880   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1881   if (f) {
1882     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1883   } else {
1884     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1891 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1892 {
1893   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1894   PetscErrorCode ierr;
1895   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1896   PetscReal      atmp;
1897   PetscScalar    *x,zero = 0.0;
1898   MatScalar      *aa;
1899   PetscInt       ncols,brow,krow,kcol;
1900 
1901   PetscFunctionBegin;
1902   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1903   bs   = A->rmap.bs;
1904   aa   = a->a;
1905   ai   = a->i;
1906   aj   = a->j;
1907   mbs  = a->mbs;
1908 
1909   ierr = VecSet(v,zero);CHKERRQ(ierr);
1910   if (idx) {
1911     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1912   }
1913   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1914   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1915   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1916   for (i=0; i<mbs; i++) {
1917     ncols = ai[1] - ai[0]; ai++;
1918     brow  = bs*i;
1919     for (j=0; j<ncols; j++){
1920       for (kcol=0; kcol<bs; kcol++){
1921         for (krow=0; krow<bs; krow++){
1922           atmp = PetscAbsScalar(*aa);aa++;
1923           row = brow + krow;    /* row index */
1924           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1925           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1926         }
1927       }
1928       aj++;
1929     }
1930   }
1931   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatCopy_SeqBAIJ"
1937 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1938 {
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices have the same copy implementation, use fast copy. */
1943   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1944     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1945     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1946 
1947     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1948       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1949     }
1950     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1951   } else {
1952     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1959 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1970 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971 {
1972   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1973   PetscFunctionBegin;
1974   *array = a->a;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1980 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1981 {
1982   PetscFunctionBegin;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 #include "petscblaslapack.h"
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1989 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1990 {
1991   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1992   PetscErrorCode ierr;
1993   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1994   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1995 
1996   PetscFunctionBegin;
1997   if (str == SAME_NONZERO_PATTERN) {
1998     PetscScalar alpha = a;
1999     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2000   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2001     if (y->xtoy && y->XtoY != X) {
2002       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2003       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2004     }
2005     if (!y->xtoy) { /* get xtoy */
2006       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2007       y->XtoY = X;
2008       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2009     }
2010     bs2 = bs*bs;
2011     for (i=0; i<x->nz; i++) {
2012       j = 0;
2013       while (j < bs2){
2014         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2015         j++;
2016       }
2017     }
2018     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2019   } else {
2020     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2027 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2028 {
2029   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2030   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2031   PetscScalar    *aa = a->a;
2032 
2033   PetscFunctionBegin;
2034   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2040 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2041 {
2042   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2043   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2044   PetscScalar    *aa = a->a;
2045 
2046   PetscFunctionBegin;
2047   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 
2052 /* -------------------------------------------------------------------*/
2053 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2054        MatGetRow_SeqBAIJ,
2055        MatRestoreRow_SeqBAIJ,
2056        MatMult_SeqBAIJ_N,
2057 /* 4*/ MatMultAdd_SeqBAIJ_N,
2058        MatMultTranspose_SeqBAIJ,
2059        MatMultTransposeAdd_SeqBAIJ,
2060        MatSolve_SeqBAIJ_N,
2061        0,
2062        0,
2063 /*10*/ 0,
2064        MatLUFactor_SeqBAIJ,
2065        0,
2066        0,
2067        MatTranspose_SeqBAIJ,
2068 /*15*/ MatGetInfo_SeqBAIJ,
2069        MatEqual_SeqBAIJ,
2070        MatGetDiagonal_SeqBAIJ,
2071        MatDiagonalScale_SeqBAIJ,
2072        MatNorm_SeqBAIJ,
2073 /*20*/ 0,
2074        MatAssemblyEnd_SeqBAIJ,
2075        0,
2076        MatSetOption_SeqBAIJ,
2077        MatZeroEntries_SeqBAIJ,
2078 /*25*/ MatZeroRows_SeqBAIJ,
2079        MatLUFactorSymbolic_SeqBAIJ,
2080        MatLUFactorNumeric_SeqBAIJ_N,
2081        MatCholeskyFactorSymbolic_SeqBAIJ,
2082        MatCholeskyFactorNumeric_SeqBAIJ_N,
2083 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2084        MatILUFactorSymbolic_SeqBAIJ,
2085        MatICCFactorSymbolic_SeqBAIJ,
2086        MatGetArray_SeqBAIJ,
2087        MatRestoreArray_SeqBAIJ,
2088 /*35*/ MatDuplicate_SeqBAIJ,
2089        0,
2090        0,
2091        MatILUFactor_SeqBAIJ,
2092        0,
2093 /*40*/ MatAXPY_SeqBAIJ,
2094        MatGetSubMatrices_SeqBAIJ,
2095        MatIncreaseOverlap_SeqBAIJ,
2096        MatGetValues_SeqBAIJ,
2097        MatCopy_SeqBAIJ,
2098 /*45*/ 0,
2099        MatScale_SeqBAIJ,
2100        0,
2101        0,
2102        0,
2103 /*50*/ 0,
2104        MatGetRowIJ_SeqBAIJ,
2105        MatRestoreRowIJ_SeqBAIJ,
2106        0,
2107        0,
2108 /*55*/ 0,
2109        0,
2110        0,
2111        0,
2112        MatSetValuesBlocked_SeqBAIJ,
2113 /*60*/ MatGetSubMatrix_SeqBAIJ,
2114        MatDestroy_SeqBAIJ,
2115        MatView_SeqBAIJ,
2116        0,
2117        0,
2118 /*65*/ 0,
2119        0,
2120        0,
2121        0,
2122        0,
2123 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2124        MatConvert_Basic,
2125        0,
2126        0,
2127        0,
2128 /*75*/ 0,
2129        0,
2130        0,
2131        0,
2132        0,
2133 /*80*/ 0,
2134        0,
2135        0,
2136        0,
2137        MatLoad_SeqBAIJ,
2138 /*85*/ 0,
2139        0,
2140        0,
2141        0,
2142        0,
2143 /*90*/ 0,
2144        0,
2145        0,
2146        0,
2147        0,
2148 /*95*/ 0,
2149        0,
2150        0,
2151        0,
2152        0,
2153 /*100*/0,
2154        0,
2155        0,
2156        0,
2157        0,
2158 /*105*/0,
2159        MatRealPart_SeqBAIJ,
2160        MatImaginaryPart_SeqBAIJ
2161 };
2162 
2163 EXTERN_C_BEGIN
2164 #undef __FUNCT__
2165 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2166 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2167 {
2168   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2169   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   if (aij->nonew != 1) {
2174     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2175   }
2176 
2177   /* allocate space for values if not already there */
2178   if (!aij->saved_values) {
2179     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2180   }
2181 
2182   /* copy values over */
2183   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 EXTERN_C_END
2187 
2188 EXTERN_C_BEGIN
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2191 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2192 {
2193   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2194   PetscErrorCode ierr;
2195   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2196 
2197   PetscFunctionBegin;
2198   if (aij->nonew != 1) {
2199     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2200   }
2201   if (!aij->saved_values) {
2202     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2203   }
2204 
2205   /* copy values over */
2206   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 EXTERN_C_END
2210 
2211 EXTERN_C_BEGIN
2212 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2213 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2214 EXTERN_C_END
2215 
2216 EXTERN_C_BEGIN
2217 #undef __FUNCT__
2218 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2219 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2220 {
2221   Mat_SeqBAIJ    *b;
2222   PetscErrorCode ierr;
2223   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2224   PetscTruth     flg,skipallocation = PETSC_FALSE;
2225 
2226   PetscFunctionBegin;
2227 
2228   if (nz == MAT_SKIP_ALLOCATION) {
2229     skipallocation = PETSC_TRUE;
2230     nz             = 0;
2231   }
2232 
2233   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2234     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2235   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2236 
2237   if (nnz && newbs != bs) {
2238     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2239   }
2240   bs   = newbs;
2241 
2242   B->rmap.bs = B->cmap.bs = bs;
2243   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2244   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2245 
2246   B->preallocated = PETSC_TRUE;
2247 
2248   mbs  = B->rmap.n/bs;
2249   nbs  = B->cmap.n/bs;
2250   bs2  = bs*bs;
2251 
2252   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2253     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2254   }
2255 
2256   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2257   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2258   if (nnz) {
2259     for (i=0; i<mbs; i++) {
2260       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2261       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2262     }
2263   }
2264 
2265   b       = (Mat_SeqBAIJ*)B->data;
2266   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2267     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2268   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2269 
2270   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2271   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2272   if (!flg) {
2273     switch (bs) {
2274     case 1:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2276       B->ops->mult            = MatMult_SeqBAIJ_1;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2278       break;
2279     case 2:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2281       B->ops->mult            = MatMult_SeqBAIJ_2;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2283       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2284       break;
2285     case 3:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2287       B->ops->mult            = MatMult_SeqBAIJ_3;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2289       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2290       break;
2291     case 4:
2292       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2293       B->ops->mult            = MatMult_SeqBAIJ_4;
2294       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2295       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2296       break;
2297     case 5:
2298       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2299       B->ops->mult            = MatMult_SeqBAIJ_5;
2300       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2301       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2302       break;
2303     case 6:
2304       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2305       B->ops->mult            = MatMult_SeqBAIJ_6;
2306       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2307       break;
2308     case 7:
2309       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2310       B->ops->mult            = MatMult_SeqBAIJ_7;
2311       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2312       break;
2313     default:
2314       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2315       B->ops->mult            = MatMult_SeqBAIJ_N;
2316       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2317       break;
2318     }
2319   }
2320   B->rmap.bs      = bs;
2321   b->mbs     = mbs;
2322   b->nbs     = nbs;
2323   if (!skipallocation) {
2324     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2325     /* b->ilen will count nonzeros in each block row so far. */
2326     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2327     if (!nnz) {
2328       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2329       else if (nz <= 0)        nz = 1;
2330       for (i=0; i<mbs; i++) b->imax[i] = nz;
2331       nz = nz*mbs;
2332     } else {
2333       nz = 0;
2334       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2335     }
2336 
2337     /* allocate the matrix space */
2338     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2339     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2340     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2341     b->singlemalloc = PETSC_TRUE;
2342 
2343     b->i[0] = 0;
2344     for (i=1; i<mbs+1; i++) {
2345       b->i[i] = b->i[i-1] + b->imax[i-1];
2346     }
2347     b->free_a     = PETSC_TRUE;
2348     b->free_ij    = PETSC_TRUE;
2349   } else {
2350     b->free_a     = PETSC_FALSE;
2351     b->free_ij    = PETSC_FALSE;
2352   }
2353 
2354   B->rmap.bs          = bs;
2355   b->bs2              = bs2;
2356   b->mbs              = mbs;
2357   b->nz               = 0;
2358   b->maxnz            = nz*bs2;
2359   B->info.nz_unneeded = (PetscReal)b->maxnz;
2360   PetscFunctionReturn(0);
2361 }
2362 EXTERN_C_END
2363 
2364 /*MC
2365    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2366    block sparse compressed row format.
2367 
2368    Options Database Keys:
2369 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2370 
2371   Level: beginner
2372 
2373 .seealso: MatCreateSeqBAIJ()
2374 M*/
2375 
2376 EXTERN_C_BEGIN
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatCreate_SeqBAIJ"
2379 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2380 {
2381   PetscErrorCode ierr;
2382   PetscMPIInt    size;
2383   Mat_SeqBAIJ    *b;
2384 
2385   PetscFunctionBegin;
2386   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2387   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2388 
2389   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2390   B->data = (void*)b;
2391   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2392   B->factor           = 0;
2393   B->mapping          = 0;
2394   b->row              = 0;
2395   b->col              = 0;
2396   b->icol             = 0;
2397   b->reallocs         = 0;
2398   b->saved_values     = 0;
2399 #if defined(PETSC_USE_MAT_SINGLE)
2400   b->setvalueslen     = 0;
2401   b->setvaluescopy    = PETSC_NULL;
2402 #endif
2403 
2404   b->sorted           = PETSC_FALSE;
2405   b->roworiented      = PETSC_TRUE;
2406   b->nonew            = 0;
2407   b->diag             = 0;
2408   b->solve_work       = 0;
2409   b->mult_work        = 0;
2410   B->spptr            = 0;
2411   B->info.nz_unneeded = (PetscReal)b->maxnz;
2412   b->keepzeroedrows   = PETSC_FALSE;
2413   b->xtoy              = 0;
2414   b->XtoY              = 0;
2415   b->compressedrow.use     = PETSC_FALSE;
2416   b->compressedrow.nrows   = 0;
2417   b->compressedrow.i       = PETSC_NULL;
2418   b->compressedrow.rindex  = PETSC_NULL;
2419   b->compressedrow.checked = PETSC_FALSE;
2420   B->same_nonzero          = PETSC_FALSE;
2421 
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2423                                      "MatInvertBlockDiagonal_SeqBAIJ",
2424                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2426                                      "MatStoreValues_SeqBAIJ",
2427                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2429                                      "MatRetrieveValues_SeqBAIJ",
2430                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2432                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2433                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2435                                      "MatConvert_SeqBAIJ_SeqAIJ",
2436                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2438                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2439                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2441                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2442                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2443   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 EXTERN_C_END
2447 
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2450 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2451 {
2452   Mat            C;
2453   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2454   PetscErrorCode ierr;
2455   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2456 
2457   PetscFunctionBegin;
2458   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2459 
2460   *B = 0;
2461   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2462   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2463   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2464   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2465   c    = (Mat_SeqBAIJ*)C->data;
2466 
2467   C->rmap.N   = A->rmap.N;
2468   C->cmap.N   = A->cmap.N;
2469   C->rmap.bs  = A->rmap.bs;
2470   c->bs2 = a->bs2;
2471   c->mbs = a->mbs;
2472   c->nbs = a->nbs;
2473 
2474   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2475   for (i=0; i<mbs; i++) {
2476     c->imax[i] = a->imax[i];
2477     c->ilen[i] = a->ilen[i];
2478   }
2479 
2480   /* allocate the matrix space */
2481   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2482   c->singlemalloc = PETSC_TRUE;
2483   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2484   if (mbs > 0) {
2485     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2486     if (cpvalues == MAT_COPY_VALUES) {
2487       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2488     } else {
2489       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2490     }
2491   }
2492   c->sorted      = a->sorted;
2493   c->roworiented = a->roworiented;
2494   c->nonew       = a->nonew;
2495 
2496   if (a->diag) {
2497     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2498     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2499     for (i=0; i<mbs; i++) {
2500       c->diag[i] = a->diag[i];
2501     }
2502   } else c->diag        = 0;
2503   c->nz                 = a->nz;
2504   c->maxnz              = a->maxnz;
2505   c->solve_work         = 0;
2506   c->mult_work          = 0;
2507   c->free_a             = PETSC_TRUE;
2508   c->free_ij            = PETSC_TRUE;
2509   C->preallocated       = PETSC_TRUE;
2510   C->assembled          = PETSC_TRUE;
2511 
2512   c->compressedrow.use     = a->compressedrow.use;
2513   c->compressedrow.nrows   = a->compressedrow.nrows;
2514   c->compressedrow.checked = a->compressedrow.checked;
2515   if ( a->compressedrow.checked && a->compressedrow.use){
2516     i = a->compressedrow.nrows;
2517     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2518     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2519     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2520     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2521   } else {
2522     c->compressedrow.use    = PETSC_FALSE;
2523     c->compressedrow.i      = PETSC_NULL;
2524     c->compressedrow.rindex = PETSC_NULL;
2525   }
2526   C->same_nonzero = A->same_nonzero;
2527   *B = C;
2528   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatLoad_SeqBAIJ"
2534 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2535 {
2536   Mat_SeqBAIJ    *a;
2537   Mat            B;
2538   PetscErrorCode ierr;
2539   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2540   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2541   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2542   PetscInt       *masked,nmask,tmp,bs2,ishift;
2543   PetscMPIInt    size;
2544   int            fd;
2545   PetscScalar    *aa;
2546   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2550     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2551   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2552   bs2  = bs*bs;
2553 
2554   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2555   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2556   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2557   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2558   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2559   M = header[1]; N = header[2]; nz = header[3];
2560 
2561   if (header[3] < 0) {
2562     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2563   }
2564 
2565   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2566 
2567   /*
2568      This code adds extra rows to make sure the number of rows is
2569     divisible by the blocksize
2570   */
2571   mbs        = M/bs;
2572   extra_rows = bs - M + bs*(mbs);
2573   if (extra_rows == bs) extra_rows = 0;
2574   else                  mbs++;
2575   if (extra_rows) {
2576     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2577   }
2578 
2579   /* read in row lengths */
2580   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2581   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2582   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2583 
2584   /* read in column indices */
2585   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2586   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2587   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2588 
2589   /* loop over row lengths determining block row lengths */
2590   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2591   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2592   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2593   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2594   masked   = mask + mbs;
2595   rowcount = 0; nzcount = 0;
2596   for (i=0; i<mbs; i++) {
2597     nmask = 0;
2598     for (j=0; j<bs; j++) {
2599       kmax = rowlengths[rowcount];
2600       for (k=0; k<kmax; k++) {
2601         tmp = jj[nzcount++]/bs;
2602         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2603       }
2604       rowcount++;
2605     }
2606     browlengths[i] += nmask;
2607     /* zero out the mask elements we set */
2608     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2609   }
2610 
2611   /* create our matrix */
2612   ierr = MatCreate(comm,&B);
2613   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2614   ierr = MatSetType(B,type);CHKERRQ(ierr);
2615   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2616   a = (Mat_SeqBAIJ*)B->data;
2617 
2618   /* set matrix "i" values */
2619   a->i[0] = 0;
2620   for (i=1; i<= mbs; i++) {
2621     a->i[i]      = a->i[i-1] + browlengths[i-1];
2622     a->ilen[i-1] = browlengths[i-1];
2623   }
2624   a->nz         = 0;
2625   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2626 
2627   /* read in nonzero values */
2628   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2629   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2630   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2631 
2632   /* set "a" and "j" values into matrix */
2633   nzcount = 0; jcount = 0;
2634   for (i=0; i<mbs; i++) {
2635     nzcountb = nzcount;
2636     nmask    = 0;
2637     for (j=0; j<bs; j++) {
2638       kmax = rowlengths[i*bs+j];
2639       for (k=0; k<kmax; k++) {
2640         tmp = jj[nzcount++]/bs;
2641 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2642       }
2643     }
2644     /* sort the masked values */
2645     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2646 
2647     /* set "j" values into matrix */
2648     maskcount = 1;
2649     for (j=0; j<nmask; j++) {
2650       a->j[jcount++]  = masked[j];
2651       mask[masked[j]] = maskcount++;
2652     }
2653     /* set "a" values into matrix */
2654     ishift = bs2*a->i[i];
2655     for (j=0; j<bs; j++) {
2656       kmax = rowlengths[i*bs+j];
2657       for (k=0; k<kmax; k++) {
2658         tmp       = jj[nzcountb]/bs ;
2659         block     = mask[tmp] - 1;
2660         point     = jj[nzcountb] - bs*tmp;
2661         idx       = ishift + bs2*block + j + bs*point;
2662         a->a[idx] = (MatScalar)aa[nzcountb++];
2663       }
2664     }
2665     /* zero out the mask elements we set */
2666     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2667   }
2668   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2669 
2670   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2671   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2672   ierr = PetscFree(aa);CHKERRQ(ierr);
2673   ierr = PetscFree(jj);CHKERRQ(ierr);
2674   ierr = PetscFree(mask);CHKERRQ(ierr);
2675 
2676   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2677   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678   ierr = MatView_Private(B);CHKERRQ(ierr);
2679 
2680   *A = B;
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatCreateSeqBAIJ"
2686 /*@C
2687    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2688    compressed row) format.  For good matrix assembly performance the
2689    user should preallocate the matrix storage by setting the parameter nz
2690    (or the array nnz).  By setting these parameters accurately, performance
2691    during matrix assembly can be increased by more than a factor of 50.
2692 
2693    Collective on MPI_Comm
2694 
2695    Input Parameters:
2696 +  comm - MPI communicator, set to PETSC_COMM_SELF
2697 .  bs - size of block
2698 .  m - number of rows
2699 .  n - number of columns
2700 .  nz - number of nonzero blocks  per block row (same for all rows)
2701 -  nnz - array containing the number of nonzero blocks in the various block rows
2702          (possibly different for each block row) or PETSC_NULL
2703 
2704    Output Parameter:
2705 .  A - the matrix
2706 
2707    Options Database Keys:
2708 .   -mat_no_unroll - uses code that does not unroll the loops in the
2709                      block calculations (much slower)
2710 .    -mat_block_size - size of the blocks to use
2711 
2712    Level: intermediate
2713 
2714    Notes:
2715    The number of rows and columns must be divisible by blocksize.
2716 
2717    If the nnz parameter is given then the nz parameter is ignored
2718 
2719    A nonzero block is any block that as 1 or more nonzeros in it
2720 
2721    The block AIJ format is fully compatible with standard Fortran 77
2722    storage.  That is, the stored row and column indices can begin at
2723    either one (as in Fortran) or zero.  See the users' manual for details.
2724 
2725    Specify the preallocated storage with either nz or nnz (not both).
2726    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2727    allocation.  For additional details, see the users manual chapter on
2728    matrices.
2729 
2730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2731 @*/
2732 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2733 {
2734   PetscErrorCode ierr;
2735 
2736   PetscFunctionBegin;
2737   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2738   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2739   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2740   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2746 /*@C
2747    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2748    per row in the matrix. For good matrix assembly performance the
2749    user should preallocate the matrix storage by setting the parameter nz
2750    (or the array nnz).  By setting these parameters accurately, performance
2751    during matrix assembly can be increased by more than a factor of 50.
2752 
2753    Collective on MPI_Comm
2754 
2755    Input Parameters:
2756 +  A - the matrix
2757 .  bs - size of block
2758 .  nz - number of block nonzeros per block row (same for all rows)
2759 -  nnz - array containing the number of block nonzeros in the various block rows
2760          (possibly different for each block row) or PETSC_NULL
2761 
2762    Options Database Keys:
2763 .   -mat_no_unroll - uses code that does not unroll the loops in the
2764                      block calculations (much slower)
2765 .    -mat_block_size - size of the blocks to use
2766 
2767    Level: intermediate
2768 
2769    Notes:
2770    If the nnz parameter is given then the nz parameter is ignored
2771 
2772    The block AIJ format is fully compatible with standard Fortran 77
2773    storage.  That is, the stored row and column indices can begin at
2774    either one (as in Fortran) or zero.  See the users' manual for details.
2775 
2776    Specify the preallocated storage with either nz or nnz (not both).
2777    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2778    allocation.  For additional details, see the users manual chapter on
2779    matrices.
2780 
2781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2782 @*/
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2784 {
2785   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2786 
2787   PetscFunctionBegin;
2788   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2789   if (f) {
2790     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2791   }
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2797 /*@
2798      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2799               (upper triangular entries in CSR format) provided by the user.
2800 
2801      Collective on MPI_Comm
2802 
2803    Input Parameters:
2804 +  comm - must be an MPI communicator of size 1
2805 .  bs - size of block
2806 .  m - number of rows
2807 .  n - number of columns
2808 .  i - row indices
2809 .  j - column indices
2810 -  a - matrix values
2811 
2812    Output Parameter:
2813 .  mat - the matrix
2814 
2815    Level: intermediate
2816 
2817    Notes:
2818        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2819     once the matrix is destroyed
2820 
2821        You cannot set new nonzero locations into this matrix, that will generate an error.
2822 
2823        The i and j indices are 0 based
2824 
2825 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2826 
2827 @*/
2828 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2829 {
2830   PetscErrorCode ierr;
2831   PetscInt       ii;
2832   Mat_SeqBAIJ    *baij;
2833 
2834   PetscFunctionBegin;
2835   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2836   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2837 
2838   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2839   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2840   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2841   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2842   baij = (Mat_SeqBAIJ*)(*mat)->data;
2843   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2844 
2845   baij->i = i;
2846   baij->j = j;
2847   baij->a = a;
2848   baij->singlemalloc = PETSC_FALSE;
2849   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2850   baij->free_a       = PETSC_FALSE;
2851   baij->free_ij       = PETSC_FALSE;
2852 
2853   for (ii=0; ii<m; ii++) {
2854     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2855 #if defined(PETSC_USE_DEBUG)
2856     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2857 #endif
2858   }
2859 #if defined(PETSC_USE_DEBUG)
2860   for (ii=0; ii<baij->i[m]; ii++) {
2861     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2862     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2863   }
2864 #endif
2865 
2866   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870