xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c8d321e0b9e4749378a4360baa6ac0a0180938ee)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
18   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode     ierr;
90   PetscInt           m = a->mbs,i,i2,nz,idx;
91   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode     ierr;
192   PetscInt           m = a->mbs,i,i2,nz,idx;
193   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode     ierr;
301   PetscInt           m = a->mbs,i,i2,nz,idx;
302   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode     ierr;
417   PetscInt           m = a->mbs,i,i2,nz,idx;
418   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   PetscInt          *ai=a->i,*ailen=a->ilen;
550   PetscInt          *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   PetscInt    *ai=a->i,*ailen=a->ilen;
622   PetscInt    *aj=a->j,brow,bcol;
623   PetscInt    ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt       *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   PetscInt       i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
719   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   PetscInt       n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     PetscInt nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     PetscInt nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatDestroy_SeqBAIJ"
785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791 #if defined(PETSC_USE_LOG)
792   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
793 #endif
794   ierr = PetscFree(a->a);CHKERRQ(ierr);
795   if (!a->singlemalloc) {
796     ierr = PetscFree(a->i);CHKERRQ(ierr);
797     ierr = PetscFree(a->j);CHKERRQ(ierr);
798   }
799   if (a->row) {
800     ierr = ISDestroy(a->row);CHKERRQ(ierr);
801   }
802   if (a->col) {
803     ierr = ISDestroy(a->col);CHKERRQ(ierr);
804   }
805   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
806   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
807   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
808   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
809   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
810   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
811   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
812   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
813 #if defined(PETSC_USE_MAT_SINGLE)
814   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
815 #endif
816   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
817   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
818 
819   ierr = PetscFree(a);CHKERRQ(ierr);
820 
821   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
822   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
823   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
824   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
825   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
826   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSetOption_SeqBAIJ"
832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
833 {
834   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
835   PetscErrorCode ierr;
836 
837   PetscFunctionBegin;
838   switch (op) {
839   case MAT_ROW_ORIENTED:
840     a->roworiented    = PETSC_TRUE;
841     break;
842   case MAT_COLUMN_ORIENTED:
843     a->roworiented    = PETSC_FALSE;
844     break;
845   case MAT_COLUMNS_SORTED:
846     a->sorted         = PETSC_TRUE;
847     break;
848   case MAT_COLUMNS_UNSORTED:
849     a->sorted         = PETSC_FALSE;
850     break;
851   case MAT_KEEP_ZEROED_ROWS:
852     a->keepzeroedrows = PETSC_TRUE;
853     break;
854   case MAT_NO_NEW_NONZERO_LOCATIONS:
855     a->nonew          = 1;
856     break;
857   case MAT_NEW_NONZERO_LOCATION_ERR:
858     a->nonew          = -1;
859     break;
860   case MAT_NEW_NONZERO_ALLOCATION_ERR:
861     a->nonew          = -2;
862     break;
863   case MAT_YES_NEW_NONZERO_LOCATIONS:
864     a->nonew          = 0;
865     break;
866   case MAT_ROWS_SORTED:
867   case MAT_ROWS_UNSORTED:
868   case MAT_YES_NEW_DIAGONALS:
869   case MAT_IGNORE_OFF_PROC_ENTRIES:
870   case MAT_USE_HASH_TABLE:
871     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
872     break;
873   case MAT_NO_NEW_DIAGONALS:
874     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
875   case MAT_SYMMETRIC:
876   case MAT_STRUCTURALLY_SYMMETRIC:
877   case MAT_NOT_SYMMETRIC:
878   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
879   case MAT_HERMITIAN:
880   case MAT_NOT_HERMITIAN:
881   case MAT_SYMMETRY_ETERNAL:
882   case MAT_NOT_SYMMETRY_ETERNAL:
883     break;
884   default:
885     SETERRQ(PETSC_ERR_SUP,"unknown option");
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatGetRow_SeqBAIJ"
892 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
893 {
894   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
895   PetscErrorCode ierr;
896   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
897   MatScalar      *aa,*aa_i;
898   PetscScalar    *v_i;
899 
900   PetscFunctionBegin;
901   bs  = A->bs;
902   ai  = a->i;
903   aj  = a->j;
904   aa  = a->a;
905   bs2 = a->bs2;
906 
907   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
908 
909   bn  = row/bs;   /* Block number */
910   bp  = row % bs; /* Block Position */
911   M   = ai[bn+1] - ai[bn];
912   *nz = bs*M;
913 
914   if (v) {
915     *v = 0;
916     if (*nz) {
917       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
918       for (i=0; i<M; i++) { /* for each block in the block row */
919         v_i  = *v + i*bs;
920         aa_i = aa + bs2*(ai[bn] + i);
921         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
922       }
923     }
924   }
925 
926   if (idx) {
927     *idx = 0;
928     if (*nz) {
929       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
930       for (i=0; i<M; i++) { /* for each block in the block row */
931         idx_i = *idx + i*bs;
932         itmp  = bs*aj[ai[bn] + i];
933         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
934       }
935     }
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
942 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
943 {
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
948   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatTranspose_SeqBAIJ"
954 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
955 {
956   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
957   Mat            C;
958   PetscErrorCode ierr;
959   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
960   PetscInt       *rows,*cols,bs2=a->bs2;
961   PetscScalar    *array;
962 
963   PetscFunctionBegin;
964   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
965   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
966   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
967 
968 #if defined(PETSC_USE_MAT_SINGLE)
969   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
970   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
971 #else
972   array = a->a;
973 #endif
974 
975   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
976   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
977   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
978   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
979   ierr = PetscFree(col);CHKERRQ(ierr);
980   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
981   cols = rows + bs;
982   for (i=0; i<mbs; i++) {
983     cols[0] = i*bs;
984     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
985     len = ai[i+1] - ai[i];
986     for (j=0; j<len; j++) {
987       rows[0] = (*aj++)*bs;
988       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
989       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
990       array += bs2;
991     }
992   }
993   ierr = PetscFree(rows);CHKERRQ(ierr);
994 #if defined(PETSC_USE_MAT_SINGLE)
995   ierr = PetscFree(array);
996 #endif
997 
998   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1000 
1001   if (B) {
1002     *B = C;
1003   } else {
1004     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1011 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1012 {
1013   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1014   PetscErrorCode ierr;
1015   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1016   int            fd;
1017   PetscScalar    *aa;
1018   FILE           *file;
1019 
1020   PetscFunctionBegin;
1021   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1022   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1023   col_lens[0] = MAT_FILE_COOKIE;
1024 
1025   col_lens[1] = A->m;
1026   col_lens[2] = A->n;
1027   col_lens[3] = a->nz*bs2;
1028 
1029   /* store lengths of each row and write (including header) to file */
1030   count = 0;
1031   for (i=0; i<a->mbs; i++) {
1032     for (j=0; j<bs; j++) {
1033       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1034     }
1035   }
1036   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1037   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1038 
1039   /* store column indices (zero start index) */
1040   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1041   count = 0;
1042   for (i=0; i<a->mbs; i++) {
1043     for (j=0; j<bs; j++) {
1044       for (k=a->i[i]; k<a->i[i+1]; k++) {
1045         for (l=0; l<bs; l++) {
1046           jj[count++] = bs*a->j[k] + l;
1047         }
1048       }
1049     }
1050   }
1051   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1052   ierr = PetscFree(jj);CHKERRQ(ierr);
1053 
1054   /* store nonzero values */
1055   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1056   count = 0;
1057   for (i=0; i<a->mbs; i++) {
1058     for (j=0; j<bs; j++) {
1059       for (k=a->i[i]; k<a->i[i+1]; k++) {
1060         for (l=0; l<bs; l++) {
1061           aa[count++] = a->a[bs2*k + l*bs + j];
1062         }
1063       }
1064     }
1065   }
1066   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1067   ierr = PetscFree(aa);CHKERRQ(ierr);
1068 
1069   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1070   if (file) {
1071     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1078 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1079 {
1080   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1081   PetscErrorCode    ierr;
1082   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1083   PetscViewerFormat format;
1084 
1085   PetscFunctionBegin;
1086   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1087   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1088     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1089   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1090     Mat aij;
1091     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1092     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(aij);CHKERRQ(ierr);
1094   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1095      PetscFunctionReturn(0);
1096   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1097     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1098     for (i=0; i<a->mbs; i++) {
1099       for (j=0; j<bs; j++) {
1100         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1101         for (k=a->i[i]; k<a->i[i+1]; k++) {
1102           for (l=0; l<bs; l++) {
1103 #if defined(PETSC_USE_COMPLEX)
1104             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1105               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1106                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1107             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1108               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1109                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1110             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1111               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1112             }
1113 #else
1114             if (a->a[bs2*k + l*bs + j] != 0.0) {
1115               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1116             }
1117 #endif
1118           }
1119         }
1120         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1121       }
1122     }
1123     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1124   } else {
1125     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1126     for (i=0; i<a->mbs; i++) {
1127       for (j=0; j<bs; j++) {
1128         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1129         for (k=a->i[i]; k<a->i[i+1]; k++) {
1130           for (l=0; l<bs; l++) {
1131 #if defined(PETSC_USE_COMPLEX)
1132             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1133               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1134                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1135             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1136               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1137                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1138             } else {
1139               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             }
1141 #else
1142             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1143 #endif
1144           }
1145         }
1146         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1147       }
1148     }
1149     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1150   }
1151   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1157 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1158 {
1159   Mat            A = (Mat) Aa;
1160   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1161   PetscErrorCode ierr;
1162   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1163   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1164   MatScalar      *aa;
1165   PetscViewer    viewer;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1170   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1171 
1172   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1173 
1174   /* loop over matrix elements drawing boxes */
1175   color = PETSC_DRAW_BLUE;
1176   for (i=0,row=0; i<mbs; i++,row+=bs) {
1177     for (j=a->i[i]; j<a->i[i+1]; j++) {
1178       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1179       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1180       aa = a->a + j*bs2;
1181       for (k=0; k<bs; k++) {
1182         for (l=0; l<bs; l++) {
1183           if (PetscRealPart(*aa++) >=  0.) continue;
1184           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1185         }
1186       }
1187     }
1188   }
1189   color = PETSC_DRAW_CYAN;
1190   for (i=0,row=0; i<mbs; i++,row+=bs) {
1191     for (j=a->i[i]; j<a->i[i+1]; j++) {
1192       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1193       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1194       aa = a->a + j*bs2;
1195       for (k=0; k<bs; k++) {
1196         for (l=0; l<bs; l++) {
1197           if (PetscRealPart(*aa++) != 0.) continue;
1198           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1199         }
1200       }
1201     }
1202   }
1203 
1204   color = PETSC_DRAW_RED;
1205   for (i=0,row=0; i<mbs; i++,row+=bs) {
1206     for (j=a->i[i]; j<a->i[i+1]; j++) {
1207       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1208       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1209       aa = a->a + j*bs2;
1210       for (k=0; k<bs; k++) {
1211         for (l=0; l<bs; l++) {
1212           if (PetscRealPart(*aa++) <= 0.) continue;
1213           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1214         }
1215       }
1216     }
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1223 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1224 {
1225   PetscErrorCode ierr;
1226   PetscReal      xl,yl,xr,yr,w,h;
1227   PetscDraw      draw;
1228   PetscTruth     isnull;
1229 
1230   PetscFunctionBegin;
1231 
1232   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1233   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1234 
1235   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1236   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1237   xr += w;    yr += h;  xl = -w;     yl = -h;
1238   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1239   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1240   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatView_SeqBAIJ"
1246 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1247 {
1248   PetscErrorCode ierr;
1249   PetscTruth     iascii,isbinary,isdraw;
1250 
1251   PetscFunctionBegin;
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1253   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1254   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1255   if (iascii){
1256     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1259   } else if (isdraw) {
1260     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1261   } else {
1262     Mat B;
1263     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1264     ierr = MatView(B,viewer);CHKERRQ(ierr);
1265     ierr = MatDestroy(B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1273 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1274 {
1275   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1276   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1277   PetscInt    *ai = a->i,*ailen = a->ilen;
1278   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1279   MatScalar   *ap,*aa = a->a,zero = 0.0;
1280 
1281   PetscFunctionBegin;
1282   for (k=0; k<m; k++) { /* loop over rows */
1283     row  = im[k]; brow = row/bs;
1284     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1285     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1286     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1287     nrow = ailen[brow];
1288     for (l=0; l<n; l++) { /* loop over columns */
1289       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1290       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1291       col  = in[l] ;
1292       bcol = col/bs;
1293       cidx = col%bs;
1294       ridx = row%bs;
1295       high = nrow;
1296       low  = 0; /* assume unsorted */
1297       while (high-low > 5) {
1298         t = (low+high)/2;
1299         if (rp[t] > bcol) high = t;
1300         else             low  = t;
1301       }
1302       for (i=low; i<high; i++) {
1303         if (rp[i] > bcol) break;
1304         if (rp[i] == bcol) {
1305           *v++ = ap[bs2*i+bs*cidx+ridx];
1306           goto finished;
1307         }
1308       }
1309       *v++ = zero;
1310       finished:;
1311     }
1312   }
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #if defined(PETSC_USE_MAT_SINGLE)
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1319 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1320 {
1321   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1322   PetscErrorCode ierr;
1323   PetscInt       i,N = m*n*b->bs2;
1324   MatScalar      *vsingle;
1325 
1326   PetscFunctionBegin;
1327   if (N > b->setvalueslen) {
1328     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1329     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1330     b->setvalueslen  = N;
1331   }
1332   vsingle = b->setvaluescopy;
1333   for (i=0; i<N; i++) {
1334     vsingle[i] = v[i];
1335   }
1336   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 #endif
1340 
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1344 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1345 {
1346   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1347   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1348   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1349   PetscErrorCode  ierr;
1350   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1351   PetscTruth      roworiented=a->roworiented;
1352   const MatScalar *value = v;
1353   MatScalar       *ap,*aa = a->a,*bap;
1354 
1355   PetscFunctionBegin;
1356   if (roworiented) {
1357     stepval = (n-1)*bs;
1358   } else {
1359     stepval = (m-1)*bs;
1360   }
1361   for (k=0; k<m; k++) { /* loop over added rows */
1362     row  = im[k];
1363     if (row < 0) continue;
1364 #if defined(PETSC_USE_DEBUG)
1365     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1366 #endif
1367     rp   = aj + ai[row];
1368     ap   = aa + bs2*ai[row];
1369     rmax = imax[row];
1370     nrow = ailen[row];
1371     low  = 0;
1372     high = nrow;
1373     for (l=0; l<n; l++) { /* loop over added columns */
1374       if (in[l] < 0) continue;
1375 #if defined(PETSC_USE_DEBUG)
1376       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1377 #endif
1378       col = in[l];
1379       if (roworiented) {
1380         value = v + k*(stepval+bs)*bs + l*bs;
1381       } else {
1382         value = v + l*(stepval+bs)*bs + k*bs;
1383       }
1384       if (col < lastcol) low = 0; else high = nrow;
1385       lastcol = col;
1386       while (high-low > 7) {
1387         t = (low+high)/2;
1388         if (rp[t] > col) high = t;
1389         else             low  = t;
1390       }
1391       for (i=low; i<high; i++) {
1392         if (rp[i] > col) break;
1393         if (rp[i] == col) {
1394           bap  = ap +  bs2*i;
1395           if (roworiented) {
1396             if (is == ADD_VALUES) {
1397               for (ii=0; ii<bs; ii++,value+=stepval) {
1398                 for (jj=ii; jj<bs2; jj+=bs) {
1399                   bap[jj] += *value++;
1400                 }
1401               }
1402             } else {
1403               for (ii=0; ii<bs; ii++,value+=stepval) {
1404                 for (jj=ii; jj<bs2; jj+=bs) {
1405                   bap[jj] = *value++;
1406                 }
1407               }
1408             }
1409           } else {
1410             if (is == ADD_VALUES) {
1411               for (ii=0; ii<bs; ii++,value+=stepval) {
1412                 for (jj=0; jj<bs; jj++) {
1413                   *bap++ += *value++;
1414                 }
1415               }
1416             } else {
1417               for (ii=0; ii<bs; ii++,value+=stepval) {
1418                 for (jj=0; jj<bs; jj++) {
1419                   *bap++  = *value++;
1420                 }
1421               }
1422             }
1423           }
1424           goto noinsert2;
1425         }
1426       }
1427       if (nonew == 1) goto noinsert2;
1428       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1429       if (nrow >= rmax) {
1430         /* there is no extra room in row, therefore enlarge */
1431         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1432         MatScalar *new_a;
1433 
1434         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1435 
1436         /* malloc new storage space */
1437         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1438 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1439         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1440         new_i   = new_j + new_nz;
1441 
1442         /* copy over old data into new slots */
1443         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1444         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1445         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1446         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1447         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1448         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1449         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1450         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1451         /* free up old matrix storage */
1452         ierr = PetscFree(a->a);CHKERRQ(ierr);
1453         if (!a->singlemalloc) {
1454           ierr = PetscFree(a->i);CHKERRQ(ierr);
1455           ierr = PetscFree(a->j);CHKERRQ(ierr);
1456         }
1457         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1458         a->singlemalloc = PETSC_TRUE;
1459 
1460         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1461         rmax = imax[row] = imax[row] + CHUNKSIZE;
1462         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1463         a->maxnz += bs2*CHUNKSIZE;
1464         a->reallocs++;
1465         a->nz++;
1466       }
1467       N = nrow++ - 1;
1468       /* shift up all the later entries in this row */
1469       for (ii=N; ii>=i; ii--) {
1470         rp[ii+1] = rp[ii];
1471         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1472       }
1473       if (N >= i) {
1474         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1475       }
1476       rp[i] = col;
1477       bap   = ap +  bs2*i;
1478       if (roworiented) {
1479         for (ii=0; ii<bs; ii++,value+=stepval) {
1480           for (jj=ii; jj<bs2; jj+=bs) {
1481             bap[jj] = *value++;
1482           }
1483         }
1484       } else {
1485         for (ii=0; ii<bs; ii++,value+=stepval) {
1486           for (jj=0; jj<bs; jj++) {
1487             *bap++  = *value++;
1488           }
1489         }
1490       }
1491       noinsert2:;
1492       low = i;
1493     }
1494     ailen[row] = nrow;
1495   }
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1501 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1502 {
1503   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1504   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1505   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1506   PetscErrorCode ierr;
1507   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1508   MatScalar      *aa = a->a,*ap;
1509   PetscReal      ratio=0.6;
1510 
1511   PetscFunctionBegin;
1512   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1513 
1514   if (m) rmax = ailen[0];
1515   for (i=1; i<mbs; i++) {
1516     /* move each row back by the amount of empty slots (fshift) before it*/
1517     fshift += imax[i-1] - ailen[i-1];
1518     rmax   = PetscMax(rmax,ailen[i]);
1519     if (fshift) {
1520       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1521       N = ailen[i];
1522       for (j=0; j<N; j++) {
1523         ip[j-fshift] = ip[j];
1524         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1525       }
1526     }
1527     ai[i] = ai[i-1] + ailen[i-1];
1528   }
1529   if (mbs) {
1530     fshift += imax[mbs-1] - ailen[mbs-1];
1531     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1532   }
1533   /* reset ilen and imax for each row */
1534   for (i=0; i<mbs; i++) {
1535     ailen[i] = imax[i] = ai[i+1] - ai[i];
1536   }
1537   a->nz = ai[mbs];
1538 
1539   /* diagonals may have moved, so kill the diagonal pointers */
1540   a->idiagvalid = PETSC_FALSE;
1541   if (fshift && a->diag) {
1542     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1543     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1544     a->diag = 0;
1545   }
1546   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1547   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1548   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1549   a->reallocs          = 0;
1550   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1551 
1552   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1553   if (a->compressedrow.use){
1554     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1555   }
1556 
1557   A->same_nonzero = PETSC_TRUE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 /*
1562    This function returns an array of flags which indicate the locations of contiguous
1563    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1564    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1565    Assume: sizes should be long enough to hold all the values.
1566 */
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1569 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1570 {
1571   PetscInt   i,j,k,row;
1572   PetscTruth flg;
1573 
1574   PetscFunctionBegin;
1575   for (i=0,j=0; i<n; j++) {
1576     row = idx[i];
1577     if (row%bs!=0) { /* Not the begining of a block */
1578       sizes[j] = 1;
1579       i++;
1580     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1581       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1582       i++;
1583     } else { /* Begining of the block, so check if the complete block exists */
1584       flg = PETSC_TRUE;
1585       for (k=1; k<bs; k++) {
1586         if (row+k != idx[i+k]) { /* break in the block */
1587           flg = PETSC_FALSE;
1588           break;
1589         }
1590       }
1591       if (flg) { /* No break in the bs */
1592         sizes[j] = bs;
1593         i+= bs;
1594       } else {
1595         sizes[j] = 1;
1596         i++;
1597       }
1598     }
1599   }
1600   *bs_max = j;
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1606 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1607 {
1608   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1609   PetscErrorCode ierr;
1610   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1611   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1612   PetscScalar    zero = 0.0;
1613   MatScalar      *aa;
1614 
1615   PetscFunctionBegin;
1616   /* Make a copy of the IS and  sort it */
1617   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1618   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1619 
1620   /* allocate memory for rows,sizes */
1621   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1622   sizes = rows + is_n;
1623 
1624   /* copy IS values to rows, and sort them */
1625   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1626   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1627   if (baij->keepzeroedrows) {
1628     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1629     bs_max = is_n;
1630     A->same_nonzero = PETSC_TRUE;
1631   } else {
1632     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1633     A->same_nonzero = PETSC_FALSE;
1634   }
1635   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1636 
1637   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638     row   = rows[j];
1639     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643       if (diag) {
1644         if (baij->ilen[row/bs] > 0) {
1645           baij->ilen[row/bs]       = 1;
1646           baij->j[baij->i[row/bs]] = row/bs;
1647           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1648         }
1649         /* Now insert all the diagonal values for this bs */
1650         for (k=0; k<bs; k++) {
1651           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1652         }
1653       } else { /* (!diag) */
1654         baij->ilen[row/bs] = 0;
1655       } /* end (!diag) */
1656     } else { /* (sizes[i] != bs) */
1657 #if defined (PETSC_USE_DEBUG)
1658       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659 #endif
1660       for (k=0; k<count; k++) {
1661         aa[0] =  zero;
1662         aa    += bs;
1663       }
1664       if (diag) {
1665         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1666       }
1667     }
1668   }
1669 
1670   ierr = PetscFree(rows);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678 {
1679   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1680   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1683   PetscErrorCode ierr;
1684   PetscInt       ridx,cidx,bs2=a->bs2;
1685   PetscTruth     roworiented=a->roworiented;
1686   MatScalar      *ap,value,*aa=a->a,*bap;
1687 
1688   PetscFunctionBegin;
1689   for (k=0; k<m; k++) { /* loop over added rows */
1690     row  = im[k]; brow = row/bs;
1691     if (row < 0) continue;
1692 #if defined(PETSC_USE_DEBUG)
1693     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1694 #endif
1695     rp   = aj + ai[brow];
1696     ap   = aa + bs2*ai[brow];
1697     rmax = imax[brow];
1698     nrow = ailen[brow];
1699     low  = 0;
1700     high = nrow;
1701     for (l=0; l<n; l++) { /* loop over added columns */
1702       if (in[l] < 0) continue;
1703 #if defined(PETSC_USE_DEBUG)
1704       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1705 #endif
1706       col = in[l]; bcol = col/bs;
1707       ridx = row % bs; cidx = col % bs;
1708       if (roworiented) {
1709         value = v[l + k*n];
1710       } else {
1711         value = v[k + l*m];
1712       }
1713       if (col < lastcol) low = 0; else high = nrow;
1714       lastcol = col;
1715       while (high-low > 7) {
1716         t = (low+high)/2;
1717         if (rp[t] > bcol) high = t;
1718         else              low  = t;
1719       }
1720       for (i=low; i<high; i++) {
1721         if (rp[i] > bcol) break;
1722         if (rp[i] == bcol) {
1723           bap  = ap +  bs2*i + bs*cidx + ridx;
1724           if (is == ADD_VALUES) *bap += value;
1725           else                  *bap  = value;
1726           goto noinsert1;
1727         }
1728       }
1729       if (nonew == 1) goto noinsert1;
1730       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1731       if (nrow >= rmax) {
1732         /* there is no extra room in row, therefore enlarge */
1733         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1734         MatScalar *new_a;
1735 
1736         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1737 
1738         /* Malloc new storage space */
1739         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1740 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1741         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1742         new_i   = new_j + new_nz;
1743 
1744         /* copy over old data into new slots */
1745         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1746         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1747         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1748         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1749         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1750         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1751         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1752         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1753         /* free up old matrix storage */
1754         ierr = PetscFree(a->a);CHKERRQ(ierr);
1755         if (!a->singlemalloc) {
1756           ierr = PetscFree(a->i);CHKERRQ(ierr);
1757           ierr = PetscFree(a->j);CHKERRQ(ierr);
1758         }
1759         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1760         a->singlemalloc = PETSC_TRUE;
1761 
1762         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1763         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1764         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1765         a->maxnz += bs2*CHUNKSIZE;
1766         a->reallocs++;
1767         a->nz++;
1768       }
1769       N = nrow++ - 1;
1770       /* shift up all the later entries in this row */
1771       for (ii=N; ii>=i; ii--) {
1772         rp[ii+1] = rp[ii];
1773         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1774       }
1775       if (N>=i) {
1776         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1777       }
1778       rp[i]                      = bcol;
1779       ap[bs2*i + bs*cidx + ridx] = value;
1780       noinsert1:;
1781       low = i;
1782     }
1783     ailen[brow] = nrow;
1784   }
1785   A->same_nonzero = PETSC_FALSE;
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1792 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1793 {
1794   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1795   Mat            outA;
1796   PetscErrorCode ierr;
1797   PetscTruth     row_identity,col_identity;
1798 
1799   PetscFunctionBegin;
1800   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1801   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1802   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1803   if (!row_identity || !col_identity) {
1804     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1805   }
1806 
1807   outA          = inA;
1808   inA->factor   = FACTOR_LU;
1809 
1810   if (!a->diag) {
1811     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1812   }
1813 
1814   a->row        = row;
1815   a->col        = col;
1816   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1817   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1818 
1819   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1820   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1821   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1822 
1823   /*
1824       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1825       for ILU(0) factorization with natural ordering
1826   */
1827   if (inA->bs < 8) {
1828     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1829   } else {
1830     if (!a->solve_work) {
1831       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1832       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1833     }
1834   }
1835 
1836   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1837 
1838   PetscFunctionReturn(0);
1839 }
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1842 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1843 {
1844   static PetscTruth called = PETSC_FALSE;
1845   MPI_Comm          comm = A->comm;
1846   PetscErrorCode    ierr;
1847 
1848   PetscFunctionBegin;
1849   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1850   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1851   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 EXTERN_C_BEGIN
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1858 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1859 {
1860   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1861   PetscInt    i,nz,nbs;
1862 
1863   PetscFunctionBegin;
1864   nz  = baij->maxnz/baij->bs2;
1865   nbs = baij->nbs;
1866   for (i=0; i<nz; i++) {
1867     baij->j[i] = indices[i];
1868   }
1869   baij->nz = nz;
1870   for (i=0; i<nbs; i++) {
1871     baij->ilen[i] = baij->imax[i];
1872   }
1873 
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1880 /*@
1881     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1882        in the matrix.
1883 
1884   Input Parameters:
1885 +  mat - the SeqBAIJ matrix
1886 -  indices - the column indices
1887 
1888   Level: advanced
1889 
1890   Notes:
1891     This can be called if you have precomputed the nonzero structure of the
1892   matrix and want to provide it to the matrix object to improve the performance
1893   of the MatSetValues() operation.
1894 
1895     You MUST have set the correct numbers of nonzeros per row in the call to
1896   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1897 
1898     MUST be called before any calls to MatSetValues();
1899 
1900 @*/
1901 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1902 {
1903   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1904 
1905   PetscFunctionBegin;
1906   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1907   PetscValidPointer(indices,2);
1908   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1909   if (f) {
1910     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1911   } else {
1912     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1919 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1920 {
1921   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1922   PetscErrorCode ierr;
1923   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1924   PetscReal      atmp;
1925   PetscScalar    *x,zero = 0.0;
1926   MatScalar      *aa;
1927   PetscInt       ncols,brow,krow,kcol;
1928 
1929   PetscFunctionBegin;
1930   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1931   bs   = A->bs;
1932   aa   = a->a;
1933   ai   = a->i;
1934   aj   = a->j;
1935   mbs = a->mbs;
1936 
1937   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1938   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1939   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1940   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1941   for (i=0; i<mbs; i++) {
1942     ncols = ai[1] - ai[0]; ai++;
1943     brow  = bs*i;
1944     for (j=0; j<ncols; j++){
1945       /* bcol = bs*(*aj); */
1946       for (kcol=0; kcol<bs; kcol++){
1947         for (krow=0; krow<bs; krow++){
1948           atmp = PetscAbsScalar(*aa); aa++;
1949           row = brow + krow;    /* row index */
1950           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1951           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1952         }
1953       }
1954       aj++;
1955     }
1956   }
1957   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1963 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1964 {
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1974 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1975 {
1976   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1977   PetscFunctionBegin;
1978   *array = a->a;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1984 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1985 {
1986   PetscFunctionBegin;
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #include "petscblaslapack.h"
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1993 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1994 {
1995   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1996   PetscErrorCode ierr;
1997   PetscInt       i,bs=Y->bs,j,bs2;
1998   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1999 
2000   PetscFunctionBegin;
2001   if (str == SAME_NONZERO_PATTERN) {
2002     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2003   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2004     if (y->xtoy && y->XtoY != X) {
2005       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2006       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2007     }
2008     if (!y->xtoy) { /* get xtoy */
2009       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2010       y->XtoY = X;
2011     }
2012     bs2 = bs*bs;
2013     for (i=0; i<x->nz; i++) {
2014       j = 0;
2015       while (j < bs2){
2016         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2017         j++;
2018       }
2019     }
2020     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2021   } else {
2022     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2023   }
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 /* -------------------------------------------------------------------*/
2028 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2029        MatGetRow_SeqBAIJ,
2030        MatRestoreRow_SeqBAIJ,
2031        MatMult_SeqBAIJ_N,
2032 /* 4*/ MatMultAdd_SeqBAIJ_N,
2033        MatMultTranspose_SeqBAIJ,
2034        MatMultTransposeAdd_SeqBAIJ,
2035        MatSolve_SeqBAIJ_N,
2036        0,
2037        0,
2038 /*10*/ 0,
2039        MatLUFactor_SeqBAIJ,
2040        0,
2041        0,
2042        MatTranspose_SeqBAIJ,
2043 /*15*/ MatGetInfo_SeqBAIJ,
2044        MatEqual_SeqBAIJ,
2045        MatGetDiagonal_SeqBAIJ,
2046        MatDiagonalScale_SeqBAIJ,
2047        MatNorm_SeqBAIJ,
2048 /*20*/ 0,
2049        MatAssemblyEnd_SeqBAIJ,
2050        0,
2051        MatSetOption_SeqBAIJ,
2052        MatZeroEntries_SeqBAIJ,
2053 /*25*/ MatZeroRows_SeqBAIJ,
2054        MatLUFactorSymbolic_SeqBAIJ,
2055        MatLUFactorNumeric_SeqBAIJ_N,
2056        MatCholeskyFactorSymbolic_SeqBAIJ,
2057        MatCholeskyFactorNumeric_SeqBAIJ_N,
2058 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2059        MatILUFactorSymbolic_SeqBAIJ,
2060        MatICCFactorSymbolic_SeqBAIJ,
2061        MatGetArray_SeqBAIJ,
2062        MatRestoreArray_SeqBAIJ,
2063 /*35*/ MatDuplicate_SeqBAIJ,
2064        0,
2065        0,
2066        MatILUFactor_SeqBAIJ,
2067        0,
2068 /*40*/ MatAXPY_SeqBAIJ,
2069        MatGetSubMatrices_SeqBAIJ,
2070        MatIncreaseOverlap_SeqBAIJ,
2071        MatGetValues_SeqBAIJ,
2072        0,
2073 /*45*/ MatPrintHelp_SeqBAIJ,
2074        MatScale_SeqBAIJ,
2075        0,
2076        0,
2077        0,
2078 /*50*/ 0,
2079        MatGetRowIJ_SeqBAIJ,
2080        MatRestoreRowIJ_SeqBAIJ,
2081        0,
2082        0,
2083 /*55*/ 0,
2084        0,
2085        0,
2086        0,
2087        MatSetValuesBlocked_SeqBAIJ,
2088 /*60*/ MatGetSubMatrix_SeqBAIJ,
2089        MatDestroy_SeqBAIJ,
2090        MatView_SeqBAIJ,
2091        MatGetPetscMaps_Petsc,
2092        0,
2093 /*65*/ 0,
2094        0,
2095        0,
2096        0,
2097        0,
2098 /*70*/ MatGetRowMax_SeqBAIJ,
2099        MatConvert_Basic,
2100        0,
2101        0,
2102        0,
2103 /*75*/ 0,
2104        0,
2105        0,
2106        0,
2107        0,
2108 /*80*/ 0,
2109        0,
2110        0,
2111        0,
2112        MatLoad_SeqBAIJ,
2113 /*85*/ 0,
2114        0,
2115        0,
2116        0,
2117        0,
2118 /*90*/ 0,
2119        0,
2120        0,
2121        0,
2122        0,
2123 /*95*/ 0,
2124        0,
2125        0,
2126        0};
2127 
2128 EXTERN_C_BEGIN
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2131 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2132 {
2133   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2134   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   if (aij->nonew != 1) {
2139     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2140   }
2141 
2142   /* allocate space for values if not already there */
2143   if (!aij->saved_values) {
2144     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2145   }
2146 
2147   /* copy values over */
2148   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 EXTERN_C_END
2152 
2153 EXTERN_C_BEGIN
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2156 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2157 {
2158   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2159   PetscErrorCode ierr;
2160   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2161 
2162   PetscFunctionBegin;
2163   if (aij->nonew != 1) {
2164     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2165   }
2166   if (!aij->saved_values) {
2167     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2168   }
2169 
2170   /* copy values over */
2171   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 EXTERN_C_END
2175 
2176 EXTERN_C_BEGIN
2177 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2178 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2179 EXTERN_C_END
2180 
2181 EXTERN_C_BEGIN
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2184 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2185 {
2186   Mat_SeqBAIJ    *b;
2187   PetscErrorCode ierr;
2188   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2189   PetscTruth     flg;
2190 
2191   PetscFunctionBegin;
2192 
2193   B->preallocated = PETSC_TRUE;
2194   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2195   if (nnz && newbs != bs) {
2196     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2197   }
2198   bs   = newbs;
2199 
2200   mbs  = B->m/bs;
2201   nbs  = B->n/bs;
2202   bs2  = bs*bs;
2203 
2204   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2205     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2206   }
2207 
2208   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2209   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2210   if (nnz) {
2211     for (i=0; i<mbs; i++) {
2212       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2213       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2214     }
2215   }
2216 
2217   b       = (Mat_SeqBAIJ*)B->data;
2218   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2219   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2220   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2221   if (!flg) {
2222     switch (bs) {
2223     case 1:
2224       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2225       B->ops->mult            = MatMult_SeqBAIJ_1;
2226       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2227       break;
2228     case 2:
2229       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2230       B->ops->mult            = MatMult_SeqBAIJ_2;
2231       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2232       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2233       break;
2234     case 3:
2235       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2236       B->ops->mult            = MatMult_SeqBAIJ_3;
2237       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2238       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2239       break;
2240     case 4:
2241       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2242       B->ops->mult            = MatMult_SeqBAIJ_4;
2243       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2244       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2245       break;
2246     case 5:
2247       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2248       B->ops->mult            = MatMult_SeqBAIJ_5;
2249       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2250       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2251       break;
2252     case 6:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2254       B->ops->mult            = MatMult_SeqBAIJ_6;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2256       break;
2257     case 7:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2259       B->ops->mult            = MatMult_SeqBAIJ_7;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2261       break;
2262     default:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2264       B->ops->mult            = MatMult_SeqBAIJ_N;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2266       break;
2267     }
2268   }
2269   B->bs      = bs;
2270   b->mbs     = mbs;
2271   b->nbs     = nbs;
2272   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2273   if (!nnz) {
2274     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2275     else if (nz <= 0)        nz = 1;
2276     for (i=0; i<mbs; i++) b->imax[i] = nz;
2277     nz = nz*mbs;
2278   } else {
2279     nz = 0;
2280     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2281   }
2282 
2283   /* allocate the matrix space */
2284   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2285   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2286   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2287   b->j  = (PetscInt*)(b->a + nz*bs2);
2288   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2289   b->i  = b->j + nz;
2290   b->singlemalloc = PETSC_TRUE;
2291 
2292   b->i[0] = 0;
2293   for (i=1; i<mbs+1; i++) {
2294     b->i[i] = b->i[i-1] + b->imax[i-1];
2295   }
2296 
2297   /* b->ilen will count nonzeros in each block row so far. */
2298   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2299   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2300   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2301 
2302   B->bs               = bs;
2303   b->bs2              = bs2;
2304   b->mbs              = mbs;
2305   b->nz               = 0;
2306   b->maxnz            = nz*bs2;
2307   B->info.nz_unneeded = (PetscReal)b->maxnz;
2308   PetscFunctionReturn(0);
2309 }
2310 EXTERN_C_END
2311 
2312 /*MC
2313    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2314    block sparse compressed row format.
2315 
2316    Options Database Keys:
2317 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2318 
2319   Level: beginner
2320 
2321 .seealso: MatCreateSeqBAIJ
2322 M*/
2323 
2324 EXTERN_C_BEGIN
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatCreate_SeqBAIJ"
2327 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2328 {
2329   PetscErrorCode ierr;
2330   PetscMPIInt    size;
2331   Mat_SeqBAIJ    *b;
2332 
2333   PetscFunctionBegin;
2334   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2335   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2336 
2337   B->m = B->M = PetscMax(B->m,B->M);
2338   B->n = B->N = PetscMax(B->n,B->N);
2339   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2340   B->data = (void*)b;
2341   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2342   B->factor           = 0;
2343   B->mapping          = 0;
2344   b->row              = 0;
2345   b->col              = 0;
2346   b->icol             = 0;
2347   b->reallocs         = 0;
2348   b->saved_values     = 0;
2349 #if defined(PETSC_USE_MAT_SINGLE)
2350   b->setvalueslen     = 0;
2351   b->setvaluescopy    = PETSC_NULL;
2352 #endif
2353 
2354   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2355   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2356 
2357   b->sorted           = PETSC_FALSE;
2358   b->roworiented      = PETSC_TRUE;
2359   b->nonew            = 0;
2360   b->diag             = 0;
2361   b->solve_work       = 0;
2362   b->mult_work        = 0;
2363   B->spptr            = 0;
2364   B->info.nz_unneeded = (PetscReal)b->maxnz;
2365   b->keepzeroedrows   = PETSC_FALSE;
2366   b->xtoy              = 0;
2367   b->XtoY              = 0;
2368   b->compressedrow.use     = PETSC_FALSE;
2369   b->compressedrow.nrows   = 0;
2370   b->compressedrow.i       = PETSC_NULL;
2371   b->compressedrow.rindex  = PETSC_NULL;
2372   b->compressedrow.checked = PETSC_FALSE;
2373   B->same_nonzero          = PETSC_FALSE;
2374 
2375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2376                                      "MatStoreValues_SeqBAIJ",
2377                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2379                                      "MatRetrieveValues_SeqBAIJ",
2380                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2382                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2383                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2385                                      "MatConvert_SeqBAIJ_SeqAIJ",
2386                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2387 #if !defined(PETSC_USE_64BIT_INT)
2388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2389                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2390                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2391 #endif
2392   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2393                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2394                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 EXTERN_C_END
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2401 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2402 {
2403   Mat            C;
2404   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2405   PetscErrorCode ierr;
2406   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2407 
2408   PetscFunctionBegin;
2409   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2410 
2411   *B = 0;
2412   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2413   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2414   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2415   c    = (Mat_SeqBAIJ*)C->data;
2416 
2417   C->M   = A->M;
2418   C->N   = A->N;
2419   C->bs  = A->bs;
2420   c->bs2 = a->bs2;
2421   c->mbs = a->mbs;
2422   c->nbs = a->nbs;
2423 
2424   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2425   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2426   for (i=0; i<mbs; i++) {
2427     c->imax[i] = a->imax[i];
2428     c->ilen[i] = a->ilen[i];
2429   }
2430 
2431   /* allocate the matrix space */
2432   c->singlemalloc = PETSC_TRUE;
2433   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2434   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2435   c->j = (PetscInt*)(c->a + nz*bs2);
2436   c->i = c->j + nz;
2437   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2438   if (mbs > 0) {
2439     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2440     if (cpvalues == MAT_COPY_VALUES) {
2441       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2442     } else {
2443       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2444     }
2445   }
2446 
2447   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2448   c->sorted      = a->sorted;
2449   c->roworiented = a->roworiented;
2450   c->nonew       = a->nonew;
2451 
2452   if (a->diag) {
2453     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2455     for (i=0; i<mbs; i++) {
2456       c->diag[i] = a->diag[i];
2457     }
2458   } else c->diag        = 0;
2459   c->nz                 = a->nz;
2460   c->maxnz              = a->maxnz;
2461   c->solve_work         = 0;
2462   c->mult_work          = 0;
2463   C->preallocated       = PETSC_TRUE;
2464   C->assembled          = PETSC_TRUE;
2465 
2466   c->compressedrow.use     = a->compressedrow.use;
2467   c->compressedrow.nrows   = a->compressedrow.nrows;
2468   c->compressedrow.checked = a->compressedrow.checked;
2469   if ( a->compressedrow.checked && a->compressedrow.use){
2470     i = a->compressedrow.nrows;
2471     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2472     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2473     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2474     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2475   } else {
2476     c->compressedrow.use    = PETSC_FALSE;
2477     c->compressedrow.i      = PETSC_NULL;
2478     c->compressedrow.rindex = PETSC_NULL;
2479   }
2480   C->same_nonzero = A->same_nonzero;
2481   *B = C;
2482   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatLoad_SeqBAIJ"
2488 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2489 {
2490   Mat_SeqBAIJ    *a;
2491   Mat            B;
2492   PetscErrorCode ierr;
2493   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2494   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2495   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2496   PetscInt       *masked,nmask,tmp,bs2,ishift;
2497   PetscMPIInt    size;
2498   int            fd;
2499   PetscScalar    *aa;
2500   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2501 
2502   PetscFunctionBegin;
2503   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2504   bs2  = bs*bs;
2505 
2506   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2507   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2508   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2509   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2510   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2511   M = header[1]; N = header[2]; nz = header[3];
2512 
2513   if (header[3] < 0) {
2514     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2515   }
2516 
2517   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2518 
2519   /*
2520      This code adds extra rows to make sure the number of rows is
2521     divisible by the blocksize
2522   */
2523   mbs        = M/bs;
2524   extra_rows = bs - M + bs*(mbs);
2525   if (extra_rows == bs) extra_rows = 0;
2526   else                  mbs++;
2527   if (extra_rows) {
2528     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2529   }
2530 
2531   /* read in row lengths */
2532   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2533   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2534   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2535 
2536   /* read in column indices */
2537   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2538   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2539   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2540 
2541   /* loop over row lengths determining block row lengths */
2542   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2543   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2544   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2545   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2546   masked   = mask + mbs;
2547   rowcount = 0; nzcount = 0;
2548   for (i=0; i<mbs; i++) {
2549     nmask = 0;
2550     for (j=0; j<bs; j++) {
2551       kmax = rowlengths[rowcount];
2552       for (k=0; k<kmax; k++) {
2553         tmp = jj[nzcount++]/bs;
2554         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2555       }
2556       rowcount++;
2557     }
2558     browlengths[i] += nmask;
2559     /* zero out the mask elements we set */
2560     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2561   }
2562 
2563   /* create our matrix */
2564   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2565   ierr = MatSetType(B,type);CHKERRQ(ierr);
2566   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2567   a = (Mat_SeqBAIJ*)B->data;
2568 
2569   /* set matrix "i" values */
2570   a->i[0] = 0;
2571   for (i=1; i<= mbs; i++) {
2572     a->i[i]      = a->i[i-1] + browlengths[i-1];
2573     a->ilen[i-1] = browlengths[i-1];
2574   }
2575   a->nz         = 0;
2576   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2577 
2578   /* read in nonzero values */
2579   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2580   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2581   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2582 
2583   /* set "a" and "j" values into matrix */
2584   nzcount = 0; jcount = 0;
2585   for (i=0; i<mbs; i++) {
2586     nzcountb = nzcount;
2587     nmask    = 0;
2588     for (j=0; j<bs; j++) {
2589       kmax = rowlengths[i*bs+j];
2590       for (k=0; k<kmax; k++) {
2591         tmp = jj[nzcount++]/bs;
2592 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2593       }
2594     }
2595     /* sort the masked values */
2596     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2597 
2598     /* set "j" values into matrix */
2599     maskcount = 1;
2600     for (j=0; j<nmask; j++) {
2601       a->j[jcount++]  = masked[j];
2602       mask[masked[j]] = maskcount++;
2603     }
2604     /* set "a" values into matrix */
2605     ishift = bs2*a->i[i];
2606     for (j=0; j<bs; j++) {
2607       kmax = rowlengths[i*bs+j];
2608       for (k=0; k<kmax; k++) {
2609         tmp       = jj[nzcountb]/bs ;
2610         block     = mask[tmp] - 1;
2611         point     = jj[nzcountb] - bs*tmp;
2612         idx       = ishift + bs2*block + j + bs*point;
2613         a->a[idx] = (MatScalar)aa[nzcountb++];
2614       }
2615     }
2616     /* zero out the mask elements we set */
2617     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2618   }
2619   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2620 
2621   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2622   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2623   ierr = PetscFree(aa);CHKERRQ(ierr);
2624   ierr = PetscFree(jj);CHKERRQ(ierr);
2625   ierr = PetscFree(mask);CHKERRQ(ierr);
2626 
2627   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2628   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629   ierr = MatView_Private(B);CHKERRQ(ierr);
2630 
2631   *A = B;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatCreateSeqBAIJ"
2637 /*@C
2638    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2639    compressed row) format.  For good matrix assembly performance the
2640    user should preallocate the matrix storage by setting the parameter nz
2641    (or the array nnz).  By setting these parameters accurately, performance
2642    during matrix assembly can be increased by more than a factor of 50.
2643 
2644    Collective on MPI_Comm
2645 
2646    Input Parameters:
2647 +  comm - MPI communicator, set to PETSC_COMM_SELF
2648 .  bs - size of block
2649 .  m - number of rows
2650 .  n - number of columns
2651 .  nz - number of nonzero blocks  per block row (same for all rows)
2652 -  nnz - array containing the number of nonzero blocks in the various block rows
2653          (possibly different for each block row) or PETSC_NULL
2654 
2655    Output Parameter:
2656 .  A - the matrix
2657 
2658    Options Database Keys:
2659 .   -mat_no_unroll - uses code that does not unroll the loops in the
2660                      block calculations (much slower)
2661 .    -mat_block_size - size of the blocks to use
2662 
2663    Level: intermediate
2664 
2665    Notes:
2666    The number of rows and columns must be divisible by blocksize.
2667 
2668    If the nnz parameter is given then the nz parameter is ignored
2669 
2670    A nonzero block is any block that as 1 or more nonzeros in it
2671 
2672    The block AIJ format is fully compatible with standard Fortran 77
2673    storage.  That is, the stored row and column indices can begin at
2674    either one (as in Fortran) or zero.  See the users' manual for details.
2675 
2676    Specify the preallocated storage with either nz or nnz (not both).
2677    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2678    allocation.  For additional details, see the users manual chapter on
2679    matrices.
2680 
2681 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2682 @*/
2683 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2684 {
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2689   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2690   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2696 /*@C
2697    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2698    per row in the matrix. For good matrix assembly performance the
2699    user should preallocate the matrix storage by setting the parameter nz
2700    (or the array nnz).  By setting these parameters accurately, performance
2701    during matrix assembly can be increased by more than a factor of 50.
2702 
2703    Collective on MPI_Comm
2704 
2705    Input Parameters:
2706 +  A - the matrix
2707 .  bs - size of block
2708 .  nz - number of block nonzeros per block row (same for all rows)
2709 -  nnz - array containing the number of block nonzeros in the various block rows
2710          (possibly different for each block row) or PETSC_NULL
2711 
2712    Options Database Keys:
2713 .   -mat_no_unroll - uses code that does not unroll the loops in the
2714                      block calculations (much slower)
2715 .    -mat_block_size - size of the blocks to use
2716 
2717    Level: intermediate
2718 
2719    Notes:
2720    If the nnz parameter is given then the nz parameter is ignored
2721 
2722    The block AIJ format is fully compatible with standard Fortran 77
2723    storage.  That is, the stored row and column indices can begin at
2724    either one (as in Fortran) or zero.  See the users' manual for details.
2725 
2726    Specify the preallocated storage with either nz or nnz (not both).
2727    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2728    allocation.  For additional details, see the users manual chapter on
2729    matrices.
2730 
2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2732 @*/
2733 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2734 {
2735   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2739   if (f) {
2740     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2741   }
2742   PetscFunctionReturn(0);
2743 }
2744 
2745