xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 046da0a13659bf6557c8549a2fea89880b2ef309)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = a->bs,mbs = a->mbs;
18   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (a->bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",a->bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode     ierr;
90   PetscInt           m = a->mbs,i,i2,nz,idx;
91   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode     ierr;
192   PetscInt           m = a->mbs,i,i2,nz,idx;
193   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       PetscLogFlops(9*(a->nz));
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       PetscLogFlops(15*m);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       PetscLogFlops(9*(a->nz));
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode     ierr;
301   PetscInt           m = a->mbs,i,i2,nz,idx;
302   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       PetscLogFlops(16*(a->nz));
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       PetscLogFlops(28*m);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       PetscLogFlops(16*(a->nz));
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode     ierr;
417   PetscInt           m = a->mbs,i,i2,nz,idx;
418   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       PetscLogFlops(25*(a->nz));
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       PetscLogFlops(45*m);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       PetscLogFlops(25*(a->nz));
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   PetscInt          *ai=a->i,*ailen=a->ilen;
550   PetscInt          *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   PetscInt    *ai=a->i,*ailen=a->ilen;
622   PetscInt    *aj=a->j,brow,bcol;
623   PetscInt    ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt       *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   PetscInt       i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
719   PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   PetscInt       n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     PetscInt nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     PetscInt nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,PetscInt *bs)
786 {
787   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
788 
789   PetscFunctionBegin;
790   *bs = baij->bs;
791   PetscFunctionReturn(0);
792 }
793 
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatDestroy_SeqBAIJ"
797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
798 {
799   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
800   PetscErrorCode ierr;
801 
802   PetscFunctionBegin;
803 #if defined(PETSC_USE_LOG)
804   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
805 #endif
806   ierr = PetscFree(a->a);CHKERRQ(ierr);
807   if (!a->singlemalloc) {
808     ierr = PetscFree(a->i);CHKERRQ(ierr);
809     ierr = PetscFree(a->j);CHKERRQ(ierr);
810   }
811   if (a->row) {
812     ierr = ISDestroy(a->row);CHKERRQ(ierr);
813   }
814   if (a->col) {
815     ierr = ISDestroy(a->col);CHKERRQ(ierr);
816   }
817   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
818   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
819   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
820   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
821   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
822   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
823   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
824   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
825 #if defined(PETSC_USE_MAT_SINGLE)
826   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
827 #endif
828   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
829 
830   ierr = PetscFree(a);CHKERRQ(ierr);
831 
832   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
833   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
834   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
835   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
836   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
837   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatSetOption_SeqBAIJ"
843 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
844 {
845   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
846 
847   PetscFunctionBegin;
848   switch (op) {
849   case MAT_ROW_ORIENTED:
850     a->roworiented    = PETSC_TRUE;
851     break;
852   case MAT_COLUMN_ORIENTED:
853     a->roworiented    = PETSC_FALSE;
854     break;
855   case MAT_COLUMNS_SORTED:
856     a->sorted         = PETSC_TRUE;
857     break;
858   case MAT_COLUMNS_UNSORTED:
859     a->sorted         = PETSC_FALSE;
860     break;
861   case MAT_KEEP_ZEROED_ROWS:
862     a->keepzeroedrows = PETSC_TRUE;
863     break;
864   case MAT_NO_NEW_NONZERO_LOCATIONS:
865     a->nonew          = 1;
866     break;
867   case MAT_NEW_NONZERO_LOCATION_ERR:
868     a->nonew          = -1;
869     break;
870   case MAT_NEW_NONZERO_ALLOCATION_ERR:
871     a->nonew          = -2;
872     break;
873   case MAT_YES_NEW_NONZERO_LOCATIONS:
874     a->nonew          = 0;
875     break;
876   case MAT_ROWS_SORTED:
877   case MAT_ROWS_UNSORTED:
878   case MAT_YES_NEW_DIAGONALS:
879   case MAT_IGNORE_OFF_PROC_ENTRIES:
880   case MAT_USE_HASH_TABLE:
881     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
882     break;
883   case MAT_NO_NEW_DIAGONALS:
884     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
885   case MAT_SYMMETRIC:
886   case MAT_STRUCTURALLY_SYMMETRIC:
887   case MAT_NOT_SYMMETRIC:
888   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
889   case MAT_HERMITIAN:
890   case MAT_NOT_HERMITIAN:
891   case MAT_SYMMETRY_ETERNAL:
892   case MAT_NOT_SYMMETRY_ETERNAL:
893     break;
894   default:
895     SETERRQ(PETSC_ERR_SUP,"unknown option");
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatGetRow_SeqBAIJ"
902 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
903 {
904   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
907   MatScalar      *aa,*aa_i;
908   PetscScalar    *v_i;
909 
910   PetscFunctionBegin;
911   bs  = a->bs;
912   ai  = a->i;
913   aj  = a->j;
914   aa  = a->a;
915   bs2 = a->bs2;
916 
917   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
918 
919   bn  = row/bs;   /* Block number */
920   bp  = row % bs; /* Block Position */
921   M   = ai[bn+1] - ai[bn];
922   *nz = bs*M;
923 
924   if (v) {
925     *v = 0;
926     if (*nz) {
927       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
928       for (i=0; i<M; i++) { /* for each block in the block row */
929         v_i  = *v + i*bs;
930         aa_i = aa + bs2*(ai[bn] + i);
931         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
932       }
933     }
934   }
935 
936   if (idx) {
937     *idx = 0;
938     if (*nz) {
939       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
940       for (i=0; i<M; i++) { /* for each block in the block row */
941         idx_i = *idx + i*bs;
942         itmp  = bs*aj[ai[bn] + i];
943         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
944       }
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
952 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
953 {
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
958   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatTranspose_SeqBAIJ"
964 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
965 {
966   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
967   Mat            C;
968   PetscErrorCode ierr;
969   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
970   PetscInt       *rows,*cols,bs2=a->bs2;
971   PetscScalar    *array;
972 
973   PetscFunctionBegin;
974   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
975   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
976   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
977 
978 #if defined(PETSC_USE_MAT_SINGLE)
979   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
980   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
981 #else
982   array = a->a;
983 #endif
984 
985   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
986   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
987   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
988   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
989   ierr = PetscFree(col);CHKERRQ(ierr);
990   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
991   cols = rows + bs;
992   for (i=0; i<mbs; i++) {
993     cols[0] = i*bs;
994     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
995     len = ai[i+1] - ai[i];
996     for (j=0; j<len; j++) {
997       rows[0] = (*aj++)*bs;
998       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
999       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1000       array += bs2;
1001     }
1002   }
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004 #if defined(PETSC_USE_MAT_SINGLE)
1005   ierr = PetscFree(array);
1006 #endif
1007 
1008   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010 
1011   if (B) {
1012     *B = C;
1013   } else {
1014     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1015   }
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1021 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1022 {
1023   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1024   PetscErrorCode ierr;
1025   PetscInt       i,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1026   int            fd;
1027   PetscScalar    *aa;
1028   FILE           *file;
1029 
1030   PetscFunctionBegin;
1031   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1032   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1033   col_lens[0] = MAT_FILE_COOKIE;
1034 
1035   col_lens[1] = A->m;
1036   col_lens[2] = A->n;
1037   col_lens[3] = a->nz*bs2;
1038 
1039   /* store lengths of each row and write (including header) to file */
1040   count = 0;
1041   for (i=0; i<a->mbs; i++) {
1042     for (j=0; j<bs; j++) {
1043       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1044     }
1045   }
1046   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1047   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1048 
1049   /* store column indices (zero start index) */
1050   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1051   count = 0;
1052   for (i=0; i<a->mbs; i++) {
1053     for (j=0; j<bs; j++) {
1054       for (k=a->i[i]; k<a->i[i+1]; k++) {
1055         for (l=0; l<bs; l++) {
1056           jj[count++] = bs*a->j[k] + l;
1057         }
1058       }
1059     }
1060   }
1061   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1062   ierr = PetscFree(jj);CHKERRQ(ierr);
1063 
1064   /* store nonzero values */
1065   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1066   count = 0;
1067   for (i=0; i<a->mbs; i++) {
1068     for (j=0; j<bs; j++) {
1069       for (k=a->i[i]; k<a->i[i+1]; k++) {
1070         for (l=0; l<bs; l++) {
1071           aa[count++] = a->a[bs2*k + l*bs + j];
1072         }
1073       }
1074     }
1075   }
1076   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1077   ierr = PetscFree(aa);CHKERRQ(ierr);
1078 
1079   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1080   if (file) {
1081     fprintf(file,"-matload_block_size %d\n",(int)a->bs);
1082   }
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1088 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1089 {
1090   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1091   PetscErrorCode    ierr;
1092   PetscInt          i,j,bs = a->bs,k,l,bs2=a->bs2;
1093   PetscViewerFormat format;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1097   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1098     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1099   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1100     Mat aij;
1101     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1102     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1103     ierr = MatDestroy(aij);CHKERRQ(ierr);
1104   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1105      PetscFunctionReturn(0);
1106   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1107     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1108     for (i=0; i<a->mbs; i++) {
1109       for (j=0; j<bs; j++) {
1110         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1111         for (k=a->i[i]; k<a->i[i+1]; k++) {
1112           for (l=0; l<bs; l++) {
1113 #if defined(PETSC_USE_COMPLEX)
1114             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1115               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1116                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1117             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1118               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1119                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1120             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1121               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1122             }
1123 #else
1124             if (a->a[bs2*k + l*bs + j] != 0.0) {
1125               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1126             }
1127 #endif
1128           }
1129         }
1130         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1131       }
1132     }
1133     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1134   } else {
1135     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1136     for (i=0; i<a->mbs; i++) {
1137       for (j=0; j<bs; j++) {
1138         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1139         for (k=a->i[i]; k<a->i[i+1]; k++) {
1140           for (l=0; l<bs; l++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1144                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1147                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             }
1151 #else
1152             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1153 #endif
1154           }
1155         }
1156         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1157       }
1158     }
1159     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1160   }
1161   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1167 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1168 {
1169   Mat            A = (Mat) Aa;
1170   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1171   PetscErrorCode ierr;
1172   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1173   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1174   MatScalar      *aa;
1175   PetscViewer    viewer;
1176 
1177   PetscFunctionBegin;
1178 
1179   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1180   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1181 
1182   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1183 
1184   /* loop over matrix elements drawing boxes */
1185   color = PETSC_DRAW_BLUE;
1186   for (i=0,row=0; i<mbs; i++,row+=bs) {
1187     for (j=a->i[i]; j<a->i[i+1]; j++) {
1188       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1189       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1190       aa = a->a + j*bs2;
1191       for (k=0; k<bs; k++) {
1192         for (l=0; l<bs; l++) {
1193           if (PetscRealPart(*aa++) >=  0.) continue;
1194           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1195         }
1196       }
1197     }
1198   }
1199   color = PETSC_DRAW_CYAN;
1200   for (i=0,row=0; i<mbs; i++,row+=bs) {
1201     for (j=a->i[i]; j<a->i[i+1]; j++) {
1202       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1203       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1204       aa = a->a + j*bs2;
1205       for (k=0; k<bs; k++) {
1206         for (l=0; l<bs; l++) {
1207           if (PetscRealPart(*aa++) != 0.) continue;
1208           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1209         }
1210       }
1211     }
1212   }
1213 
1214   color = PETSC_DRAW_RED;
1215   for (i=0,row=0; i<mbs; i++,row+=bs) {
1216     for (j=a->i[i]; j<a->i[i+1]; j++) {
1217       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1218       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219       aa = a->a + j*bs2;
1220       for (k=0; k<bs; k++) {
1221         for (l=0; l<bs; l++) {
1222           if (PetscRealPart(*aa++) <= 0.) continue;
1223           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1224         }
1225       }
1226     }
1227   }
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1233 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1234 {
1235   PetscErrorCode ierr;
1236   PetscReal      xl,yl,xr,yr,w,h;
1237   PetscDraw      draw;
1238   PetscTruth     isnull;
1239 
1240   PetscFunctionBegin;
1241 
1242   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1243   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1244 
1245   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1246   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1247   xr += w;    yr += h;  xl = -w;     yl = -h;
1248   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1249   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1250   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatView_SeqBAIJ"
1256 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1257 {
1258   PetscErrorCode ierr;
1259   PetscTruth     iascii,isbinary,isdraw;
1260 
1261   PetscFunctionBegin;
1262   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1263   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1264   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1265   if (iascii){
1266     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1267   } else if (isbinary) {
1268     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1269   } else if (isdraw) {
1270     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1271   } else {
1272     Mat B;
1273     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1274     ierr = MatView(B,viewer);CHKERRQ(ierr);
1275     ierr = MatDestroy(B);CHKERRQ(ierr);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1283 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1284 {
1285   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1286   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1287   PetscInt    *ai = a->i,*ailen = a->ilen;
1288   PetscInt    brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1289   MatScalar   *ap,*aa = a->a,zero = 0.0;
1290 
1291   PetscFunctionBegin;
1292   for (k=0; k<m; k++) { /* loop over rows */
1293     row  = im[k]; brow = row/bs;
1294     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1295     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1296     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1297     nrow = ailen[brow];
1298     for (l=0; l<n; l++) { /* loop over columns */
1299       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1300       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1301       col  = in[l] ;
1302       bcol = col/bs;
1303       cidx = col%bs;
1304       ridx = row%bs;
1305       high = nrow;
1306       low  = 0; /* assume unsorted */
1307       while (high-low > 5) {
1308         t = (low+high)/2;
1309         if (rp[t] > bcol) high = t;
1310         else             low  = t;
1311       }
1312       for (i=low; i<high; i++) {
1313         if (rp[i] > bcol) break;
1314         if (rp[i] == bcol) {
1315           *v++ = ap[bs2*i+bs*cidx+ridx];
1316           goto finished;
1317         }
1318       }
1319       *v++ = zero;
1320       finished:;
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #if defined(PETSC_USE_MAT_SINGLE)
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1329 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1330 {
1331   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1332   PetscErrorCode ierr;
1333   PetscInt       i,N = m*n*b->bs2;
1334   MatScalar      *vsingle;
1335 
1336   PetscFunctionBegin;
1337   if (N > b->setvalueslen) {
1338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1340     b->setvalueslen  = N;
1341   }
1342   vsingle = b->setvaluescopy;
1343   for (i=0; i<N; i++) {
1344     vsingle[i] = v[i];
1345   }
1346   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 #endif
1350 
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1354 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1355 {
1356   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1357   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1358   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1359   PetscErrorCode  ierr;
1360   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
1361   PetscTruth      roworiented=a->roworiented;
1362   const MatScalar *value = v;
1363   MatScalar       *ap,*aa = a->a,*bap;
1364 
1365   PetscFunctionBegin;
1366   if (roworiented) {
1367     stepval = (n-1)*bs;
1368   } else {
1369     stepval = (m-1)*bs;
1370   }
1371   for (k=0; k<m; k++) { /* loop over added rows */
1372     row  = im[k];
1373     if (row < 0) continue;
1374 #if defined(PETSC_USE_BOPT_g)
1375     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1376 #endif
1377     rp   = aj + ai[row];
1378     ap   = aa + bs2*ai[row];
1379     rmax = imax[row];
1380     nrow = ailen[row];
1381     low  = 0;
1382     for (l=0; l<n; l++) { /* loop over added columns */
1383       if (in[l] < 0) continue;
1384 #if defined(PETSC_USE_BOPT_g)
1385       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1386 #endif
1387       col = in[l];
1388       if (roworiented) {
1389         value = v + k*(stepval+bs)*bs + l*bs;
1390       } else {
1391         value = v + l*(stepval+bs)*bs + k*bs;
1392       }
1393       if (!sorted) low = 0; high = nrow;
1394       while (high-low > 7) {
1395         t = (low+high)/2;
1396         if (rp[t] > col) high = t;
1397         else             low  = t;
1398       }
1399       for (i=low; i<high; i++) {
1400         if (rp[i] > col) break;
1401         if (rp[i] == col) {
1402           bap  = ap +  bs2*i;
1403           if (roworiented) {
1404             if (is == ADD_VALUES) {
1405               for (ii=0; ii<bs; ii++,value+=stepval) {
1406                 for (jj=ii; jj<bs2; jj+=bs) {
1407                   bap[jj] += *value++;
1408                 }
1409               }
1410             } else {
1411               for (ii=0; ii<bs; ii++,value+=stepval) {
1412                 for (jj=ii; jj<bs2; jj+=bs) {
1413                   bap[jj] = *value++;
1414                 }
1415               }
1416             }
1417           } else {
1418             if (is == ADD_VALUES) {
1419               for (ii=0; ii<bs; ii++,value+=stepval) {
1420                 for (jj=0; jj<bs; jj++) {
1421                   *bap++ += *value++;
1422                 }
1423               }
1424             } else {
1425               for (ii=0; ii<bs; ii++,value+=stepval) {
1426                 for (jj=0; jj<bs; jj++) {
1427                   *bap++  = *value++;
1428                 }
1429               }
1430             }
1431           }
1432           goto noinsert2;
1433         }
1434       }
1435       if (nonew == 1) goto noinsert2;
1436       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1437       if (nrow >= rmax) {
1438         /* there is no extra room in row, therefore enlarge */
1439         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1440         MatScalar *new_a;
1441 
1442         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1443 
1444         /* malloc new storage space */
1445         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1446 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1447         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1448         new_i   = new_j + new_nz;
1449 
1450         /* copy over old data into new slots */
1451         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1452         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1453         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1454         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1455         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1456         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1457         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1458         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1459         /* free up old matrix storage */
1460         ierr = PetscFree(a->a);CHKERRQ(ierr);
1461         if (!a->singlemalloc) {
1462           ierr = PetscFree(a->i);CHKERRQ(ierr);
1463           ierr = PetscFree(a->j);CHKERRQ(ierr);
1464         }
1465         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1466         a->singlemalloc = PETSC_TRUE;
1467 
1468         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1469         rmax = imax[row] = imax[row] + CHUNKSIZE;
1470         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1471         a->maxnz += bs2*CHUNKSIZE;
1472         a->reallocs++;
1473         a->nz++;
1474       }
1475       N = nrow++ - 1;
1476       /* shift up all the later entries in this row */
1477       for (ii=N; ii>=i; ii--) {
1478         rp[ii+1] = rp[ii];
1479         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1480       }
1481       if (N >= i) {
1482         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1483       }
1484       rp[i] = col;
1485       bap   = ap +  bs2*i;
1486       if (roworiented) {
1487         for (ii=0; ii<bs; ii++,value+=stepval) {
1488           for (jj=ii; jj<bs2; jj+=bs) {
1489             bap[jj] = *value++;
1490           }
1491         }
1492       } else {
1493         for (ii=0; ii<bs; ii++,value+=stepval) {
1494           for (jj=0; jj<bs; jj++) {
1495             *bap++  = *value++;
1496           }
1497         }
1498       }
1499       noinsert2:;
1500       low = i;
1501     }
1502     ailen[row] = nrow;
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1509 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1510 {
1511   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1512   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1513   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1514   PetscErrorCode ierr;
1515   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1516   MatScalar      *aa = a->a,*ap;
1517 
1518   PetscFunctionBegin;
1519   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1520 
1521   if (m) rmax = ailen[0];
1522   for (i=1; i<mbs; i++) {
1523     /* move each row back by the amount of empty slots (fshift) before it*/
1524     fshift += imax[i-1] - ailen[i-1];
1525     rmax   = PetscMax(rmax,ailen[i]);
1526     if (fshift) {
1527       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1528       N = ailen[i];
1529       for (j=0; j<N; j++) {
1530         ip[j-fshift] = ip[j];
1531         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1532       }
1533     }
1534     ai[i] = ai[i-1] + ailen[i-1];
1535   }
1536   if (mbs) {
1537     fshift += imax[mbs-1] - ailen[mbs-1];
1538     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1539   }
1540   /* reset ilen and imax for each row */
1541   for (i=0; i<mbs; i++) {
1542     ailen[i] = imax[i] = ai[i+1] - ai[i];
1543   }
1544   a->nz = ai[mbs];
1545 
1546   /* diagonals may have moved, so kill the diagonal pointers */
1547   a->idiagvalid = PETSC_FALSE;
1548   if (fshift && a->diag) {
1549     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1550     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1551     a->diag = 0;
1552   }
1553   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1554   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs);
1555   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
1556   a->reallocs          = 0;
1557   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1558 
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 
1563 
1564 /*
1565    This function returns an array of flags which indicate the locations of contiguous
1566    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1567    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1568    Assume: sizes should be long enough to hold all the values.
1569 */
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1573 {
1574   PetscInt   i,j,k,row;
1575   PetscTruth flg;
1576 
1577   PetscFunctionBegin;
1578   for (i=0,j=0; i<n; j++) {
1579     row = idx[i];
1580     if (row%bs!=0) { /* Not the begining of a block */
1581       sizes[j] = 1;
1582       i++;
1583     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1584       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1585       i++;
1586     } else { /* Begining of the block, so check if the complete block exists */
1587       flg = PETSC_TRUE;
1588       for (k=1; k<bs; k++) {
1589         if (row+k != idx[i+k]) { /* break in the block */
1590           flg = PETSC_FALSE;
1591           break;
1592         }
1593       }
1594       if (flg == PETSC_TRUE) { /* No break in the bs */
1595         sizes[j] = bs;
1596         i+= bs;
1597       } else {
1598         sizes[j] = 1;
1599         i++;
1600       }
1601     }
1602   }
1603   *bs_max = j;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1610 {
1611   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1612   PetscErrorCode ierr;
1613   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1614   PetscInt       bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1615   PetscScalar    zero = 0.0;
1616   MatScalar      *aa;
1617 
1618   PetscFunctionBegin;
1619   /* Make a copy of the IS and  sort it */
1620   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1621   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1622 
1623   /* allocate memory for rows,sizes */
1624   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1625   sizes = rows + is_n;
1626 
1627   /* copy IS values to rows, and sort them */
1628   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1629   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1630   if (baij->keepzeroedrows) {
1631     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1632     bs_max = is_n;
1633   } else {
1634     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1635   }
1636   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1637 
1638   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1639     row   = rows[j];
1640     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1641     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1642     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1643     if (sizes[i] == bs && !baij->keepzeroedrows) {
1644       if (diag) {
1645         if (baij->ilen[row/bs] > 0) {
1646           baij->ilen[row/bs]       = 1;
1647           baij->j[baij->i[row/bs]] = row/bs;
1648           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1649         }
1650         /* Now insert all the diagonal values for this bs */
1651         for (k=0; k<bs; k++) {
1652           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1653         }
1654       } else { /* (!diag) */
1655         baij->ilen[row/bs] = 0;
1656       } /* end (!diag) */
1657     } else { /* (sizes[i] != bs) */
1658 #if defined (PETSC_USE_DEBUG)
1659       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1660 #endif
1661       for (k=0; k<count; k++) {
1662         aa[0] =  zero;
1663         aa    += bs;
1664       }
1665       if (diag) {
1666         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1667       }
1668     }
1669   }
1670 
1671   ierr = PetscFree(rows);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1679 {
1680   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1681   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1682   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1683   PetscInt       *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1684   PetscErrorCode ierr;
1685   PetscInt       ridx,cidx,bs2=a->bs2;
1686   PetscTruth     roworiented=a->roworiented;
1687   MatScalar      *ap,value,*aa=a->a,*bap;
1688 
1689   PetscFunctionBegin;
1690   for (k=0; k<m; k++) { /* loop over added rows */
1691     row  = im[k]; brow = row/bs;
1692     if (row < 0) continue;
1693 #if defined(PETSC_USE_BOPT_g)
1694     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1695 #endif
1696     rp   = aj + ai[brow];
1697     ap   = aa + bs2*ai[brow];
1698     rmax = imax[brow];
1699     nrow = ailen[brow];
1700     low  = 0;
1701     for (l=0; l<n; l++) { /* loop over added columns */
1702       if (in[l] < 0) continue;
1703 #if defined(PETSC_USE_BOPT_g)
1704       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1705 #endif
1706       col = in[l]; bcol = col/bs;
1707       ridx = row % bs; cidx = col % bs;
1708       if (roworiented) {
1709         value = v[l + k*n];
1710       } else {
1711         value = v[k + l*m];
1712       }
1713       if (!sorted) low = 0; high = nrow;
1714       while (high-low > 7) {
1715         t = (low+high)/2;
1716         if (rp[t] > bcol) high = t;
1717         else              low  = t;
1718       }
1719       for (i=low; i<high; i++) {
1720         if (rp[i] > bcol) break;
1721         if (rp[i] == bcol) {
1722           bap  = ap +  bs2*i + bs*cidx + ridx;
1723           if (is == ADD_VALUES) *bap += value;
1724           else                  *bap  = value;
1725           goto noinsert1;
1726         }
1727       }
1728       if (nonew == 1) goto noinsert1;
1729       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730       if (nrow >= rmax) {
1731         /* there is no extra room in row, therefore enlarge */
1732         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1733         MatScalar *new_a;
1734 
1735         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1736 
1737         /* Malloc new storage space */
1738         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1739 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1740         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1741         new_i   = new_j + new_nz;
1742 
1743         /* copy over old data into new slots */
1744         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1745         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1746         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1747         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1748         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1749         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1750         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1751         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1752         /* free up old matrix storage */
1753         ierr = PetscFree(a->a);CHKERRQ(ierr);
1754         if (!a->singlemalloc) {
1755           ierr = PetscFree(a->i);CHKERRQ(ierr);
1756           ierr = PetscFree(a->j);CHKERRQ(ierr);
1757         }
1758         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1759         a->singlemalloc = PETSC_TRUE;
1760 
1761         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1762         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1763         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1764         a->maxnz += bs2*CHUNKSIZE;
1765         a->reallocs++;
1766         a->nz++;
1767       }
1768       N = nrow++ - 1;
1769       /* shift up all the later entries in this row */
1770       for (ii=N; ii>=i; ii--) {
1771         rp[ii+1] = rp[ii];
1772         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1773       }
1774       if (N>=i) {
1775         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1776       }
1777       rp[i]                      = bcol;
1778       ap[bs2*i + bs*cidx + ridx] = value;
1779       noinsert1:;
1780       low = i;
1781     }
1782     ailen[brow] = nrow;
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1790 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1791 {
1792   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1793   Mat            outA;
1794   PetscErrorCode ierr;
1795   PetscTruth     row_identity,col_identity;
1796 
1797   PetscFunctionBegin;
1798   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1799   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1800   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1801   if (!row_identity || !col_identity) {
1802     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1803   }
1804 
1805   outA          = inA;
1806   inA->factor   = FACTOR_LU;
1807 
1808   if (!a->diag) {
1809     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1810   }
1811 
1812   a->row        = row;
1813   a->col        = col;
1814   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1815   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1816 
1817   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1818   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1819   PetscLogObjectParent(inA,a->icol);
1820 
1821   /*
1822       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1823       for ILU(0) factorization with natural ordering
1824   */
1825   if (a->bs < 8) {
1826     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1827   } else {
1828     if (!a->solve_work) {
1829       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1830       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1831     }
1832   }
1833 
1834   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1835 
1836   PetscFunctionReturn(0);
1837 }
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1840 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1841 {
1842   static PetscTruth called = PETSC_FALSE;
1843   MPI_Comm          comm = A->comm;
1844   PetscErrorCode    ierr;
1845 
1846   PetscFunctionBegin;
1847   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1848   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1849   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 EXTERN_C_BEGIN
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1856 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1857 {
1858   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1859   PetscInt    i,nz,nbs;
1860 
1861   PetscFunctionBegin;
1862   nz  = baij->maxnz/baij->bs2;
1863   nbs = baij->nbs;
1864   for (i=0; i<nz; i++) {
1865     baij->j[i] = indices[i];
1866   }
1867   baij->nz = nz;
1868   for (i=0; i<nbs; i++) {
1869     baij->ilen[i] = baij->imax[i];
1870   }
1871 
1872   PetscFunctionReturn(0);
1873 }
1874 EXTERN_C_END
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1878 /*@
1879     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1880        in the matrix.
1881 
1882   Input Parameters:
1883 +  mat - the SeqBAIJ matrix
1884 -  indices - the column indices
1885 
1886   Level: advanced
1887 
1888   Notes:
1889     This can be called if you have precomputed the nonzero structure of the
1890   matrix and want to provide it to the matrix object to improve the performance
1891   of the MatSetValues() operation.
1892 
1893     You MUST have set the correct numbers of nonzeros per row in the call to
1894   MatCreateSeqBAIJ().
1895 
1896     MUST be called before any calls to MatSetValues();
1897 
1898 @*/
1899 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1900 {
1901   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1902 
1903   PetscFunctionBegin;
1904   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1905   PetscValidPointer(indices,2);
1906   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1907   if (f) {
1908     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1909   } else {
1910     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1917 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1918 {
1919   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1920   PetscErrorCode ierr;
1921   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1922   PetscReal      atmp;
1923   PetscScalar    *x,zero = 0.0;
1924   MatScalar      *aa;
1925   PetscInt       ncols,brow,krow,kcol;
1926 
1927   PetscFunctionBegin;
1928   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1929   bs   = a->bs;
1930   aa   = a->a;
1931   ai   = a->i;
1932   aj   = a->j;
1933   mbs = a->mbs;
1934 
1935   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1936   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1937   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1938   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1939   for (i=0; i<mbs; i++) {
1940     ncols = ai[1] - ai[0]; ai++;
1941     brow  = bs*i;
1942     for (j=0; j<ncols; j++){
1943       /* bcol = bs*(*aj); */
1944       for (kcol=0; kcol<bs; kcol++){
1945         for (krow=0; krow<bs; krow++){
1946           atmp = PetscAbsScalar(*aa); aa++;
1947           row = brow + krow;    /* row index */
1948           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1949           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1950         }
1951       }
1952       aj++;
1953     }
1954   }
1955   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1961 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1962 {
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1972 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1973 {
1974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1975   PetscFunctionBegin;
1976   *array = a->a;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1982 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1983 {
1984   PetscFunctionBegin;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #include "petscblaslapack.h"
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1991 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1992 {
1993   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1994   PetscErrorCode ierr;
1995   PetscInt       i,bs=y->bs,j,bs2;
1996   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1997 
1998   PetscFunctionBegin;
1999   if (str == SAME_NONZERO_PATTERN) {
2000     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2001   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2002     if (y->xtoy && y->XtoY != X) {
2003       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2004       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2005     }
2006     if (!y->xtoy) { /* get xtoy */
2007       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2008       y->XtoY = X;
2009     }
2010     bs2 = bs*bs;
2011     for (i=0; i<x->nz; i++) {
2012       j = 0;
2013       while (j < bs2){
2014         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2015         j++;
2016       }
2017     }
2018     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2019   } else {
2020     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 /* -------------------------------------------------------------------*/
2026 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2027        MatGetRow_SeqBAIJ,
2028        MatRestoreRow_SeqBAIJ,
2029        MatMult_SeqBAIJ_N,
2030 /* 4*/ MatMultAdd_SeqBAIJ_N,
2031        MatMultTranspose_SeqBAIJ,
2032        MatMultTransposeAdd_SeqBAIJ,
2033        MatSolve_SeqBAIJ_N,
2034        0,
2035        0,
2036 /*10*/ 0,
2037        MatLUFactor_SeqBAIJ,
2038        0,
2039        0,
2040        MatTranspose_SeqBAIJ,
2041 /*15*/ MatGetInfo_SeqBAIJ,
2042        MatEqual_SeqBAIJ,
2043        MatGetDiagonal_SeqBAIJ,
2044        MatDiagonalScale_SeqBAIJ,
2045        MatNorm_SeqBAIJ,
2046 /*20*/ 0,
2047        MatAssemblyEnd_SeqBAIJ,
2048        0,
2049        MatSetOption_SeqBAIJ,
2050        MatZeroEntries_SeqBAIJ,
2051 /*25*/ MatZeroRows_SeqBAIJ,
2052        MatLUFactorSymbolic_SeqBAIJ,
2053        MatLUFactorNumeric_SeqBAIJ_N,
2054        0,
2055        0,
2056 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2057        MatILUFactorSymbolic_SeqBAIJ,
2058        0,
2059        MatGetArray_SeqBAIJ,
2060        MatRestoreArray_SeqBAIJ,
2061 /*35*/ MatDuplicate_SeqBAIJ,
2062        0,
2063        0,
2064        MatILUFactor_SeqBAIJ,
2065        0,
2066 /*40*/ MatAXPY_SeqBAIJ,
2067        MatGetSubMatrices_SeqBAIJ,
2068        MatIncreaseOverlap_SeqBAIJ,
2069        MatGetValues_SeqBAIJ,
2070        0,
2071 /*45*/ MatPrintHelp_SeqBAIJ,
2072        MatScale_SeqBAIJ,
2073        0,
2074        0,
2075        0,
2076 /*50*/ MatGetBlockSize_SeqBAIJ,
2077        MatGetRowIJ_SeqBAIJ,
2078        MatRestoreRowIJ_SeqBAIJ,
2079        0,
2080        0,
2081 /*55*/ 0,
2082        0,
2083        0,
2084        0,
2085        MatSetValuesBlocked_SeqBAIJ,
2086 /*60*/ MatGetSubMatrix_SeqBAIJ,
2087        MatDestroy_SeqBAIJ,
2088        MatView_SeqBAIJ,
2089        MatGetPetscMaps_Petsc,
2090        0,
2091 /*65*/ 0,
2092        0,
2093        0,
2094        0,
2095        0,
2096 /*70*/ MatGetRowMax_SeqBAIJ,
2097        MatConvert_Basic,
2098        0,
2099        0,
2100        0,
2101 /*75*/ 0,
2102        0,
2103        0,
2104        0,
2105        0,
2106 /*80*/ 0,
2107        0,
2108        0,
2109        0,
2110        MatLoad_SeqBAIJ,
2111 /*85*/ 0,
2112        0,
2113        0,
2114        0,
2115        0,
2116 /*90*/ 0,
2117        0,
2118        0,
2119        0,
2120        0,
2121 /*95*/ 0,
2122        0,
2123        0,
2124        0};
2125 
2126 EXTERN_C_BEGIN
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2129 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2130 {
2131   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2132   PetscInt       nz = aij->i[mat->m]*aij->bs*aij->bs2;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   if (aij->nonew != 1) {
2137     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2138   }
2139 
2140   /* allocate space for values if not already there */
2141   if (!aij->saved_values) {
2142     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2143   }
2144 
2145   /* copy values over */
2146   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 EXTERN_C_END
2150 
2151 EXTERN_C_BEGIN
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2154 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2155 {
2156   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2157   PetscErrorCode ierr;
2158   PetscInt       nz = aij->i[mat->m]*aij->bs*aij->bs2;
2159 
2160   PetscFunctionBegin;
2161   if (aij->nonew != 1) {
2162     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2163   }
2164   if (!aij->saved_values) {
2165     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2166   }
2167 
2168   /* copy values over */
2169   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 EXTERN_C_END
2173 
2174 EXTERN_C_BEGIN
2175 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2176 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2177 EXTERN_C_END
2178 
2179 EXTERN_C_BEGIN
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2182 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2183 {
2184   Mat_SeqBAIJ    *b;
2185   PetscErrorCode ierr;
2186   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2187   PetscTruth     flg;
2188 
2189   PetscFunctionBegin;
2190 
2191   B->preallocated = PETSC_TRUE;
2192   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2193   if (nnz && newbs != bs) {
2194     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2195   }
2196   bs   = newbs;
2197 
2198   mbs  = B->m/bs;
2199   nbs  = B->n/bs;
2200   bs2  = bs*bs;
2201 
2202   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2203     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2204   }
2205 
2206   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2207   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2208   if (nnz) {
2209     for (i=0; i<mbs; i++) {
2210       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2211       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2212     }
2213   }
2214 
2215   b       = (Mat_SeqBAIJ*)B->data;
2216   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2217   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2218   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2219   if (!flg) {
2220     switch (bs) {
2221     case 1:
2222       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2223       B->ops->mult            = MatMult_SeqBAIJ_1;
2224       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2225       break;
2226     case 2:
2227       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2228       B->ops->mult            = MatMult_SeqBAIJ_2;
2229       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2230       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2231       break;
2232     case 3:
2233       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2234       B->ops->mult            = MatMult_SeqBAIJ_3;
2235       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2236       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2237       break;
2238     case 4:
2239       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2240       B->ops->mult            = MatMult_SeqBAIJ_4;
2241       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2242       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2243       break;
2244     case 5:
2245       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2246       B->ops->mult            = MatMult_SeqBAIJ_5;
2247       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2248       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2249       break;
2250     case 6:
2251       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2252       B->ops->mult            = MatMult_SeqBAIJ_6;
2253       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2254       break;
2255     case 7:
2256       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2257       B->ops->mult            = MatMult_SeqBAIJ_7;
2258       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2259       break;
2260     default:
2261       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2262       B->ops->mult            = MatMult_SeqBAIJ_N;
2263       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2264       break;
2265     }
2266   }
2267   b->bs      = bs;
2268   b->mbs     = mbs;
2269   b->nbs     = nbs;
2270   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2271   if (!nnz) {
2272     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2273     else if (nz <= 0)        nz = 1;
2274     for (i=0; i<mbs; i++) b->imax[i] = nz;
2275     nz = nz*mbs;
2276   } else {
2277     nz = 0;
2278     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2279   }
2280 
2281   /* allocate the matrix space */
2282   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2283   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2284   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2285   b->j  = (PetscInt*)(b->a + nz*bs2);
2286   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2287   b->i  = b->j + nz;
2288   b->singlemalloc = PETSC_TRUE;
2289 
2290   b->i[0] = 0;
2291   for (i=1; i<mbs+1; i++) {
2292     b->i[i] = b->i[i-1] + b->imax[i-1];
2293   }
2294 
2295   /* b->ilen will count nonzeros in each block row so far. */
2296   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2297   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2298   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2299 
2300   b->bs               = bs;
2301   b->bs2              = bs2;
2302   b->mbs              = mbs;
2303   b->nz               = 0;
2304   b->maxnz            = nz*bs2;
2305   B->info.nz_unneeded = (PetscReal)b->maxnz;
2306   PetscFunctionReturn(0);
2307 }
2308 EXTERN_C_END
2309 
2310 /*MC
2311    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2312    block sparse compressed row format.
2313 
2314    Options Database Keys:
2315 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2316 
2317   Level: beginner
2318 
2319 .seealso: MatCreateSeqBAIJ
2320 M*/
2321 
2322 EXTERN_C_BEGIN
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatCreate_SeqBAIJ"
2325 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2326 {
2327   PetscErrorCode ierr;
2328   PetscMPIInt    size;
2329   Mat_SeqBAIJ    *b;
2330 
2331   PetscFunctionBegin;
2332   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2333   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2334 
2335   B->m = B->M = PetscMax(B->m,B->M);
2336   B->n = B->N = PetscMax(B->n,B->N);
2337   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2338   B->data = (void*)b;
2339   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2340   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2341   B->factor           = 0;
2342   B->lupivotthreshold = 1.0;
2343   B->mapping          = 0;
2344   b->row              = 0;
2345   b->col              = 0;
2346   b->icol             = 0;
2347   b->reallocs         = 0;
2348   b->saved_values     = 0;
2349 #if defined(PETSC_USE_MAT_SINGLE)
2350   b->setvalueslen     = 0;
2351   b->setvaluescopy    = PETSC_NULL;
2352 #endif
2353 
2354   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2355   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2356 
2357   b->sorted           = PETSC_FALSE;
2358   b->roworiented      = PETSC_TRUE;
2359   b->nonew            = 0;
2360   b->diag             = 0;
2361   b->solve_work       = 0;
2362   b->mult_work        = 0;
2363   B->spptr            = 0;
2364   B->info.nz_unneeded = (PetscReal)b->maxnz;
2365   b->keepzeroedrows   = PETSC_FALSE;
2366   b->xtoy              = 0;
2367   b->XtoY              = 0;
2368 
2369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2370                                      "MatStoreValues_SeqBAIJ",
2371                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2373                                      "MatRetrieveValues_SeqBAIJ",
2374                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2376                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2377                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2379                                      "MatConvert_SeqBAIJ_SeqAIJ",
2380                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2381 #if !defined(PETSC_USE_64BIT_INT)
2382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2383                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2384                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2385 #endif
2386   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2387                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2388                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 EXTERN_C_END
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2395 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2396 {
2397   Mat            C;
2398   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2399   PetscErrorCode ierr;
2400   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2401 
2402   PetscFunctionBegin;
2403   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2404 
2405   *B = 0;
2406   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2407   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2408   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2409   c    = (Mat_SeqBAIJ*)C->data;
2410 
2411   C->M   = A->M;
2412   C->N   = A->N;
2413   c->bs  = a->bs;
2414   c->bs2 = a->bs2;
2415   c->mbs = a->mbs;
2416   c->nbs = a->nbs;
2417 
2418   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2419   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2420   for (i=0; i<mbs; i++) {
2421     c->imax[i] = a->imax[i];
2422     c->ilen[i] = a->ilen[i];
2423   }
2424 
2425   /* allocate the matrix space */
2426   c->singlemalloc = PETSC_TRUE;
2427   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2428   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2429   c->j = (PetscInt*)(c->a + nz*bs2);
2430   c->i = c->j + nz;
2431   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2432   if (mbs > 0) {
2433     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2434     if (cpvalues == MAT_COPY_VALUES) {
2435       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2436     } else {
2437       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2438     }
2439   }
2440 
2441   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2442   c->sorted      = a->sorted;
2443   c->roworiented = a->roworiented;
2444   c->nonew       = a->nonew;
2445 
2446   if (a->diag) {
2447     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2448     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2449     for (i=0; i<mbs; i++) {
2450       c->diag[i] = a->diag[i];
2451     }
2452   } else c->diag        = 0;
2453   c->nz                 = a->nz;
2454   c->maxnz              = a->maxnz;
2455   c->solve_work         = 0;
2456   c->mult_work          = 0;
2457   C->preallocated       = PETSC_TRUE;
2458   C->assembled          = PETSC_TRUE;
2459   *B = C;
2460   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatLoad_SeqBAIJ"
2466 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2467 {
2468   Mat_SeqBAIJ    *a;
2469   Mat            B;
2470   PetscErrorCode ierr;
2471   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2472   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2473   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2474   PetscInt       *masked,nmask,tmp,bs2,ishift;
2475   PetscMPIInt    size;
2476   int            fd;
2477   PetscScalar    *aa;
2478   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2479 
2480   PetscFunctionBegin;
2481   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2482   bs2  = bs*bs;
2483 
2484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2485   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2486   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2487   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2488   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2489   M = header[1]; N = header[2]; nz = header[3];
2490 
2491   if (header[3] < 0) {
2492     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2493   }
2494 
2495   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2496 
2497   /*
2498      This code adds extra rows to make sure the number of rows is
2499     divisible by the blocksize
2500   */
2501   mbs        = M/bs;
2502   extra_rows = bs - M + bs*(mbs);
2503   if (extra_rows == bs) extra_rows = 0;
2504   else                  mbs++;
2505   if (extra_rows) {
2506     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2507   }
2508 
2509   /* read in row lengths */
2510   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2511   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2512   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2513 
2514   /* read in column indices */
2515   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2516   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2517   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2518 
2519   /* loop over row lengths determining block row lengths */
2520   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2521   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2522   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2523   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2524   masked   = mask + mbs;
2525   rowcount = 0; nzcount = 0;
2526   for (i=0; i<mbs; i++) {
2527     nmask = 0;
2528     for (j=0; j<bs; j++) {
2529       kmax = rowlengths[rowcount];
2530       for (k=0; k<kmax; k++) {
2531         tmp = jj[nzcount++]/bs;
2532         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2533       }
2534       rowcount++;
2535     }
2536     browlengths[i] += nmask;
2537     /* zero out the mask elements we set */
2538     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2539   }
2540 
2541   /* create our matrix */
2542   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2543   ierr = MatSetType(B,type);CHKERRQ(ierr);
2544   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2545   a = (Mat_SeqBAIJ*)B->data;
2546 
2547   /* set matrix "i" values */
2548   a->i[0] = 0;
2549   for (i=1; i<= mbs; i++) {
2550     a->i[i]      = a->i[i-1] + browlengths[i-1];
2551     a->ilen[i-1] = browlengths[i-1];
2552   }
2553   a->nz         = 0;
2554   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2555 
2556   /* read in nonzero values */
2557   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2558   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2559   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2560 
2561   /* set "a" and "j" values into matrix */
2562   nzcount = 0; jcount = 0;
2563   for (i=0; i<mbs; i++) {
2564     nzcountb = nzcount;
2565     nmask    = 0;
2566     for (j=0; j<bs; j++) {
2567       kmax = rowlengths[i*bs+j];
2568       for (k=0; k<kmax; k++) {
2569         tmp = jj[nzcount++]/bs;
2570 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2571       }
2572     }
2573     /* sort the masked values */
2574     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2575 
2576     /* set "j" values into matrix */
2577     maskcount = 1;
2578     for (j=0; j<nmask; j++) {
2579       a->j[jcount++]  = masked[j];
2580       mask[masked[j]] = maskcount++;
2581     }
2582     /* set "a" values into matrix */
2583     ishift = bs2*a->i[i];
2584     for (j=0; j<bs; j++) {
2585       kmax = rowlengths[i*bs+j];
2586       for (k=0; k<kmax; k++) {
2587         tmp       = jj[nzcountb]/bs ;
2588         block     = mask[tmp] - 1;
2589         point     = jj[nzcountb] - bs*tmp;
2590         idx       = ishift + bs2*block + j + bs*point;
2591         a->a[idx] = (MatScalar)aa[nzcountb++];
2592       }
2593     }
2594     /* zero out the mask elements we set */
2595     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2596   }
2597   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2598 
2599   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2600   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2601   ierr = PetscFree(aa);CHKERRQ(ierr);
2602   ierr = PetscFree(jj);CHKERRQ(ierr);
2603   ierr = PetscFree(mask);CHKERRQ(ierr);
2604 
2605   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2606   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2607   ierr = MatView_Private(B);CHKERRQ(ierr);
2608 
2609   *A = B;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatCreateSeqBAIJ"
2615 /*@C
2616    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2617    compressed row) format.  For good matrix assembly performance the
2618    user should preallocate the matrix storage by setting the parameter nz
2619    (or the array nnz).  By setting these parameters accurately, performance
2620    during matrix assembly can be increased by more than a factor of 50.
2621 
2622    Collective on MPI_Comm
2623 
2624    Input Parameters:
2625 +  comm - MPI communicator, set to PETSC_COMM_SELF
2626 .  bs - size of block
2627 .  m - number of rows
2628 .  n - number of columns
2629 .  nz - number of nonzero blocks  per block row (same for all rows)
2630 -  nnz - array containing the number of nonzero blocks in the various block rows
2631          (possibly different for each block row) or PETSC_NULL
2632 
2633    Output Parameter:
2634 .  A - the matrix
2635 
2636    Options Database Keys:
2637 .   -mat_no_unroll - uses code that does not unroll the loops in the
2638                      block calculations (much slower)
2639 .    -mat_block_size - size of the blocks to use
2640 
2641    Level: intermediate
2642 
2643    Notes:
2644    A nonzero block is any block that as 1 or more nonzeros in it
2645 
2646    The block AIJ format is fully compatible with standard Fortran 77
2647    storage.  That is, the stored row and column indices can begin at
2648    either one (as in Fortran) or zero.  See the users' manual for details.
2649 
2650    Specify the preallocated storage with either nz or nnz (not both).
2651    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2652    allocation.  For additional details, see the users manual chapter on
2653    matrices.
2654 
2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2656 @*/
2657 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2658 {
2659   PetscErrorCode ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2663   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2664   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2670 /*@C
2671    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2672    per row in the matrix. For good matrix assembly performance the
2673    user should preallocate the matrix storage by setting the parameter nz
2674    (or the array nnz).  By setting these parameters accurately, performance
2675    during matrix assembly can be increased by more than a factor of 50.
2676 
2677    Collective on MPI_Comm
2678 
2679    Input Parameters:
2680 +  A - the matrix
2681 .  bs - size of block
2682 .  nz - number of block nonzeros per block row (same for all rows)
2683 -  nnz - array containing the number of block nonzeros in the various block rows
2684          (possibly different for each block row) or PETSC_NULL
2685 
2686    Options Database Keys:
2687 .   -mat_no_unroll - uses code that does not unroll the loops in the
2688                      block calculations (much slower)
2689 .    -mat_block_size - size of the blocks to use
2690 
2691    Level: intermediate
2692 
2693    Notes:
2694    The block AIJ format is fully compatible with standard Fortran 77
2695    storage.  That is, the stored row and column indices can begin at
2696    either one (as in Fortran) or zero.  See the users' manual for details.
2697 
2698    Specify the preallocated storage with either nz or nnz (not both).
2699    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2700    allocation.  For additional details, see the users manual chapter on
2701    matrices.
2702 
2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2704 @*/
2705 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2706 {
2707   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2708 
2709   PetscFunctionBegin;
2710   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2711   if (f) {
2712     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2713   }
2714   PetscFunctionReturn(0);
2715 }
2716 
2717