xref: /petsc/src/mat/impls/baij/seq/baij.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
16 {
17   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
18   PetscErrorCode ierr;
19   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
20   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
21 
22   PetscFunctionBegin;
23   if (a->idiagvalid) PetscFunctionReturn(0);
24   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
25   diag_offset = a->diag;
26   if (!a->idiag) {
27     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
28   }
29   diag  = a->idiag;
30   mdiag = a->idiag+bs*bs*mbs;
31   /* factor and invert each block */
32   switch (bs){
33     case 2:
34       for (i=0; i<mbs; i++) {
35         odiag   = v + 4*diag_offset[i];
36         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
37 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
38 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
39 	diag    += 4;
40 	mdiag   += 4;
41       }
42       break;
43     case 3:
44       for (i=0; i<mbs; i++) {
45         odiag    = v + 9*diag_offset[i];
46         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
47         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
48         diag[8]  = odiag[8];
49         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
50         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
51         mdiag[8] = odiag[8];
52 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
53 	diag    += 9;
54 	mdiag   += 9;
55       }
56       break;
57     case 4:
58       for (i=0; i<mbs; i++) {
59         odiag  = v + 16*diag_offset[i];
60         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
61         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
62 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
63 	diag  += 16;
64 	mdiag += 16;
65       }
66       break;
67     case 5:
68       for (i=0; i<mbs; i++) {
69         odiag = v + 25*diag_offset[i];
70         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
71         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
72 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
73 	diag  += 25;
74 	mdiag += 25;
75       }
76       break;
77     default:
78       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
79   }
80   a->idiagvalid = PETSC_TRUE;
81   PetscFunctionReturn(0);
82 }
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
86 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
87 {
88   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
89   PetscScalar        *x,x1,x2,s1,s2;
90   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
91   PetscErrorCode     ierr;
92   PetscInt           m = a->mbs,i,i2,nz,idx;
93   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
94 
95   PetscFunctionBegin;
96   its = its*lits;
97   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
98   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
99   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
100   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
101   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
102 
103   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
104 
105   diag  = a->diag;
106   idiag = a->idiag;
107   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
108   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
109 
110   if (flag & SOR_ZERO_INITIAL_GUESS) {
111     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
112       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
113       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
114       i2     = 2;
115       idiag += 4;
116       for (i=1; i<m; i++) {
117 	v     = aa + 4*ai[i];
118 	vi    = aj + ai[i];
119 	nz    = diag[i] - ai[i];
120 	s1    = b[i2]; s2 = b[i2+1];
121 	while (nz--) {
122 	  idx  = 2*(*vi++);
123 	  x1   = x[idx]; x2 = x[1+idx];
124 	  s1  -= v[0]*x1 + v[2]*x2;
125 	  s2  -= v[1]*x1 + v[3]*x2;
126 	  v   += 4;
127 	}
128 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
129 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
130         idiag   += 4;
131         i2      += 2;
132       }
133       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
134       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
135     }
136     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
137         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
138       i2    = 0;
139       mdiag = a->idiag+4*a->mbs;
140       for (i=0; i<m; i++) {
141         x1      = x[i2]; x2 = x[i2+1];
142         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
143         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
144         mdiag  += 4;
145         i2     += 2;
146       }
147       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
148     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
149       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
150     }
151     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
152       idiag   = a->idiag+4*a->mbs - 4;
153       i2      = 2*m - 2;
154       x1      = x[i2]; x2 = x[i2+1];
155       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
156       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
157       idiag -= 4;
158       i2    -= 2;
159       for (i=m-2; i>=0; i--) {
160 	v     = aa + 4*(diag[i]+1);
161 	vi    = aj + diag[i] + 1;
162 	nz    = ai[i+1] - diag[i] - 1;
163 	s1    = x[i2]; s2 = x[i2+1];
164 	while (nz--) {
165 	  idx  = 2*(*vi++);
166 	  x1   = x[idx]; x2 = x[1+idx];
167 	  s1  -= v[0]*x1 + v[2]*x2;
168 	  s2  -= v[1]*x1 + v[3]*x2;
169 	  v   += 4;
170 	}
171 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
172 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
173         idiag   -= 4;
174         i2      -= 2;
175       }
176       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
177     }
178   } else {
179     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
180   }
181   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
182   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
188 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
189 {
190   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
191   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
192   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
193   PetscErrorCode     ierr;
194   PetscInt           m = a->mbs,i,i2,nz,idx;
195   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
196 
197   PetscFunctionBegin;
198   its = its*lits;
199   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
200   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
201   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
202   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
203   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
204 
205   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
206 
207   diag  = a->diag;
208   idiag = a->idiag;
209   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211 
212   if (flag & SOR_ZERO_INITIAL_GUESS) {
213     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
214       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
215       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
216       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
217       i2     = 3;
218       idiag += 9;
219       for (i=1; i<m; i++) {
220 	v     = aa + 9*ai[i];
221 	vi    = aj + ai[i];
222 	nz    = diag[i] - ai[i];
223 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
224 	while (nz--) {
225 	  idx  = 3*(*vi++);
226 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
227 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
228 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
229 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
230 	  v   += 9;
231 	}
232 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
233 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
234 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
235         idiag   += 9;
236         i2      += 3;
237       }
238       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
239       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
240     }
241     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
242         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
243       i2    = 0;
244       mdiag = a->idiag+9*a->mbs;
245       for (i=0; i<m; i++) {
246         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
247         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
248         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
249         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
250         mdiag  += 9;
251         i2     += 3;
252       }
253       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
254     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
255       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
256     }
257     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
258       idiag   = a->idiag+9*a->mbs - 9;
259       i2      = 3*m - 3;
260       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
261       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
262       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
263       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
264       idiag -= 9;
265       i2    -= 3;
266       for (i=m-2; i>=0; i--) {
267 	v     = aa + 9*(diag[i]+1);
268 	vi    = aj + diag[i] + 1;
269 	nz    = ai[i+1] - diag[i] - 1;
270 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
271 	while (nz--) {
272 	  idx  = 3*(*vi++);
273 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
274 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
275 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
276 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
277 	  v   += 9;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
280 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
281 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
282         idiag   -= 9;
283         i2      -= 3;
284       }
285       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
286     }
287   } else {
288     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
289   }
290   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
291   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
297 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
298 {
299   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
300   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
301   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
302   PetscErrorCode     ierr;
303   PetscInt           m = a->mbs,i,i2,nz,idx;
304   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
305 
306   PetscFunctionBegin;
307   its = its*lits;
308   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
309   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
310   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
311   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
312   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
313 
314   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
315 
316   diag  = a->diag;
317   idiag = a->idiag;
318   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
319   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
320 
321   if (flag & SOR_ZERO_INITIAL_GUESS) {
322     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
323       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
324       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
325       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
326       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
327       i2     = 4;
328       idiag += 16;
329       for (i=1; i<m; i++) {
330 	v     = aa + 16*ai[i];
331 	vi    = aj + ai[i];
332 	nz    = diag[i] - ai[i];
333 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
334 	while (nz--) {
335 	  idx  = 4*(*vi++);
336 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
337 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
338 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
339 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
340 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
341 	  v   += 16;
342 	}
343 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
344 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
345 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
346 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
347         idiag   += 16;
348         i2      += 4;
349       }
350       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
351       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
352     }
353     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
354         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
355       i2    = 0;
356       mdiag = a->idiag+16*a->mbs;
357       for (i=0; i<m; i++) {
358         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
359         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
360         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
361         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
362         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
363         mdiag  += 16;
364         i2     += 4;
365       }
366       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
367     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
369     }
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
371       idiag   = a->idiag+16*a->mbs - 16;
372       i2      = 4*m - 4;
373       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
374       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
375       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
376       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
377       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
378       idiag -= 16;
379       i2    -= 4;
380       for (i=m-2; i>=0; i--) {
381 	v     = aa + 16*(diag[i]+1);
382 	vi    = aj + diag[i] + 1;
383 	nz    = ai[i+1] - diag[i] - 1;
384 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
385 	while (nz--) {
386 	  idx  = 4*(*vi++);
387 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
388 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
389 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
390 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
391 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
392 	  v   += 16;
393 	}
394 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
395 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
396 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
397 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
398         idiag   -= 16;
399         i2      -= 4;
400       }
401       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
402     }
403   } else {
404     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
405   }
406   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
407   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
413 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
414 {
415   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
416   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
417   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
418   PetscErrorCode     ierr;
419   PetscInt           m = a->mbs,i,i2,nz,idx;
420   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
421 
422   PetscFunctionBegin;
423   its = its*lits;
424   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
425   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
426   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
427   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
428   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
429 
430   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
431 
432   diag  = a->diag;
433   idiag = a->idiag;
434   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
435   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
436 
437   if (flag & SOR_ZERO_INITIAL_GUESS) {
438     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
439       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
440       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
441       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
442       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
443       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
444       i2     = 5;
445       idiag += 25;
446       for (i=1; i<m; i++) {
447 	v     = aa + 25*ai[i];
448 	vi    = aj + ai[i];
449 	nz    = diag[i] - ai[i];
450 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
451 	while (nz--) {
452 	  idx  = 5*(*vi++);
453 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
454 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
455 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
456 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
457 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
458 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
459 	  v   += 25;
460 	}
461 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
462 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
463 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
464 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
465 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
466         idiag   += 25;
467         i2      += 5;
468       }
469       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
470       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
471     }
472     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
473         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
474       i2    = 0;
475       mdiag = a->idiag+25*a->mbs;
476       for (i=0; i<m; i++) {
477         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
478         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
479         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
480         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
481         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
482         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
483         mdiag  += 25;
484         i2     += 5;
485       }
486       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
487     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
488       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
489     }
490     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
491       idiag   = a->idiag+25*a->mbs - 25;
492       i2      = 5*m - 5;
493       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
494       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
495       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
496       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
497       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
498       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
499       idiag -= 25;
500       i2    -= 5;
501       for (i=m-2; i>=0; i--) {
502 	v     = aa + 25*(diag[i]+1);
503 	vi    = aj + diag[i] + 1;
504 	nz    = ai[i+1] - diag[i] - 1;
505 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
506 	while (nz--) {
507 	  idx  = 5*(*vi++);
508 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
509 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
510 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
511 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
512 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
513 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
514 	  v   += 25;
515 	}
516 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
517 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
518 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
519 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
520 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
521         idiag   -= 25;
522         i2      -= 5;
523       }
524       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
525     }
526   } else {
527     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
528   }
529   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
530   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 /*
535     Special version for Fun3d sequential benchmark
536 */
537 #if defined(PETSC_HAVE_FORTRAN_CAPS)
538 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
539 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
540 #define matsetvaluesblocked4_ matsetvaluesblocked4
541 #endif
542 
543 EXTERN_C_BEGIN
544 #undef __FUNCT__
545 #define __FUNCT__ "matsetvaluesblocked4_"
546 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
547 {
548   Mat               A = *AA;
549   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
550   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
551   PetscInt          *ai=a->i,*ailen=a->ilen;
552   PetscInt          *aj=a->j,stepval;
553   const PetscScalar *value = v;
554   MatScalar         *ap,*aa = a->a,*bap;
555 
556   PetscFunctionBegin;
557   stepval = (n-1)*4;
558   for (k=0; k<m; k++) { /* loop over added rows */
559     row  = im[k];
560     rp   = aj + ai[row];
561     ap   = aa + 16*ai[row];
562     nrow = ailen[row];
563     low  = 0;
564     for (l=0; l<n; l++) { /* loop over added columns */
565       col = in[l];
566       value = v + k*(stepval+4)*4 + l*4;
567       low = 0; high = nrow;
568       while (high-low > 7) {
569         t = (low+high)/2;
570         if (rp[t] > col) high = t;
571         else             low  = t;
572       }
573       for (i=low; i<high; i++) {
574         if (rp[i] > col) break;
575         if (rp[i] == col) {
576           bap  = ap +  16*i;
577           for (ii=0; ii<4; ii++,value+=stepval) {
578             for (jj=ii; jj<16; jj+=4) {
579               bap[jj] += *value++;
580             }
581           }
582           goto noinsert2;
583         }
584       }
585       N = nrow++ - 1;
586       /* shift up all the later entries in this row */
587       for (ii=N; ii>=i; ii--) {
588         rp[ii+1] = rp[ii];
589         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
590       }
591       if (N >= i) {
592         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
593       }
594       rp[i] = col;
595       bap   = ap +  16*i;
596       for (ii=0; ii<4; ii++,value+=stepval) {
597         for (jj=ii; jj<16; jj+=4) {
598           bap[jj] = *value++;
599         }
600       }
601       noinsert2:;
602       low = i;
603     }
604     ailen[row] = nrow;
605   }
606   PetscFunctionReturnVoid();
607 }
608 EXTERN_C_END
609 
610 #if defined(PETSC_HAVE_FORTRAN_CAPS)
611 #define matsetvalues4_ MATSETVALUES4
612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
613 #define matsetvalues4_ matsetvalues4
614 #endif
615 
616 EXTERN_C_BEGIN
617 #undef __FUNCT__
618 #define __FUNCT__ "MatSetValues4_"
619 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
620 {
621   Mat         A = *AA;
622   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
623   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
624   PetscInt    *ai=a->i,*ailen=a->ilen;
625   PetscInt    *aj=a->j,brow,bcol;
626   PetscInt    ridx,cidx;
627   MatScalar   *ap,value,*aa=a->a,*bap;
628 
629   PetscFunctionBegin;
630   for (k=0; k<m; k++) { /* loop over added rows */
631     row  = im[k]; brow = row/4;
632     rp   = aj + ai[brow];
633     ap   = aa + 16*ai[brow];
634     nrow = ailen[brow];
635     low  = 0;
636     for (l=0; l<n; l++) { /* loop over added columns */
637       col = in[l]; bcol = col/4;
638       ridx = row % 4; cidx = col % 4;
639       value = v[l + k*n];
640       low = 0; high = nrow;
641       while (high-low > 7) {
642         t = (low+high)/2;
643         if (rp[t] > bcol) high = t;
644         else              low  = t;
645       }
646       for (i=low; i<high; i++) {
647         if (rp[i] > bcol) break;
648         if (rp[i] == bcol) {
649           bap  = ap +  16*i + 4*cidx + ridx;
650           *bap += value;
651           goto noinsert1;
652         }
653       }
654       N = nrow++ - 1;
655       /* shift up all the later entries in this row */
656       for (ii=N; ii>=i; ii--) {
657         rp[ii+1] = rp[ii];
658         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
659       }
660       if (N>=i) {
661         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
662       }
663       rp[i]                    = bcol;
664       ap[16*i + 4*cidx + ridx] = value;
665       noinsert1:;
666       low = i;
667     }
668     ailen[brow] = nrow;
669   }
670   PetscFunctionReturnVoid();
671 }
672 EXTERN_C_END
673 
674 /*  UGLY, ugly, ugly
675    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
676    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
677    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
678    converts the entries into single precision and then calls ..._MatScalar() to put them
679    into the single precision data structures.
680 */
681 #if defined(PETSC_USE_MAT_SINGLE)
682 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
683 #else
684 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
685 #endif
686 
687 #define CHUNKSIZE  10
688 
689 /*
690      Checks for missing diagonals
691 */
692 #undef __FUNCT__
693 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
694 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
695 {
696   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
697   PetscErrorCode ierr;
698   PetscInt       *diag,*jj = a->j,i;
699 
700   PetscFunctionBegin;
701   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
702   diag = a->diag;
703   for (i=0; i<a->mbs; i++) {
704     if (jj[diag[i]] != i) {
705       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
706     }
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
713 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
714 {
715   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       i,j,*diag,m = a->mbs;
718 
719   PetscFunctionBegin;
720   if (a->diag) PetscFunctionReturn(0);
721 
722   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
723   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
724   for (i=0; i<m; i++) {
725     diag[i] = a->i[i+1];
726     for (j=a->i[i]; j<a->i[i+1]; j++) {
727       if (a->j[j] == i) {
728         diag[i] = j;
729         break;
730       }
731     }
732   }
733   a->diag = diag;
734   PetscFunctionReturn(0);
735 }
736 
737 
738 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
742 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
743 {
744   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
745   PetscErrorCode ierr;
746   PetscInt       n = a->mbs,i;
747 
748   PetscFunctionBegin;
749   *nn = n;
750   if (!ia) PetscFunctionReturn(0);
751   if (symmetric) {
752     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
753   } else if (oshift == 1) {
754     /* temporarily add 1 to i and j indices */
755     PetscInt nz = a->i[n];
756     for (i=0; i<nz; i++) a->j[i]++;
757     for (i=0; i<n+1; i++) a->i[i]++;
758     *ia = a->i; *ja = a->j;
759   } else {
760     *ia = a->i; *ja = a->j;
761   }
762 
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
768 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
769 {
770   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
771   PetscErrorCode ierr;
772   PetscInt       i,n = a->mbs;
773 
774   PetscFunctionBegin;
775   if (!ia) PetscFunctionReturn(0);
776   if (symmetric) {
777     ierr = PetscFree(*ia);CHKERRQ(ierr);
778     ierr = PetscFree(*ja);CHKERRQ(ierr);
779   } else if (oshift == 1) {
780     PetscInt nz = a->i[n]-1;
781     for (i=0; i<nz; i++) a->j[i]--;
782     for (i=0; i<n+1; i++) a->i[i]--;
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatDestroy_SeqBAIJ"
789 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
790 {
791   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795 #if defined(PETSC_USE_LOG)
796   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
797 #endif
798   ierr = PetscFree(a->a);CHKERRQ(ierr);
799   if (!a->singlemalloc) {
800     ierr = PetscFree(a->i);CHKERRQ(ierr);
801     ierr = PetscFree(a->j);CHKERRQ(ierr);
802   }
803   if (a->row) {
804     ierr = ISDestroy(a->row);CHKERRQ(ierr);
805   }
806   if (a->col) {
807     ierr = ISDestroy(a->col);CHKERRQ(ierr);
808   }
809   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
810   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
811   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
812   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
813   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
814   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
815   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
816   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
817 #if defined(PETSC_USE_MAT_SINGLE)
818   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
819 #endif
820   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
821   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
822 
823   ierr = PetscFree(a);CHKERRQ(ierr);
824 
825   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
826   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
827   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
828   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
829   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
830   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatSetOption_SeqBAIJ"
836 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
837 {
838   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   switch (op) {
843   case MAT_ROW_ORIENTED:
844     a->roworiented    = PETSC_TRUE;
845     break;
846   case MAT_COLUMN_ORIENTED:
847     a->roworiented    = PETSC_FALSE;
848     break;
849   case MAT_COLUMNS_SORTED:
850     a->sorted         = PETSC_TRUE;
851     break;
852   case MAT_COLUMNS_UNSORTED:
853     a->sorted         = PETSC_FALSE;
854     break;
855   case MAT_KEEP_ZEROED_ROWS:
856     a->keepzeroedrows = PETSC_TRUE;
857     break;
858   case MAT_NO_NEW_NONZERO_LOCATIONS:
859     a->nonew          = 1;
860     break;
861   case MAT_NEW_NONZERO_LOCATION_ERR:
862     a->nonew          = -1;
863     break;
864   case MAT_NEW_NONZERO_ALLOCATION_ERR:
865     a->nonew          = -2;
866     break;
867   case MAT_YES_NEW_NONZERO_LOCATIONS:
868     a->nonew          = 0;
869     break;
870   case MAT_ROWS_SORTED:
871   case MAT_ROWS_UNSORTED:
872   case MAT_YES_NEW_DIAGONALS:
873   case MAT_IGNORE_OFF_PROC_ENTRIES:
874   case MAT_USE_HASH_TABLE:
875     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
876     break;
877   case MAT_NO_NEW_DIAGONALS:
878     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
879   case MAT_SYMMETRIC:
880   case MAT_STRUCTURALLY_SYMMETRIC:
881   case MAT_NOT_SYMMETRIC:
882   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
883   case MAT_HERMITIAN:
884   case MAT_NOT_HERMITIAN:
885   case MAT_SYMMETRY_ETERNAL:
886   case MAT_NOT_SYMMETRY_ETERNAL:
887     break;
888   default:
889     SETERRQ(PETSC_ERR_SUP,"unknown option");
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatGetRow_SeqBAIJ"
896 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
897 {
898   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
901   MatScalar      *aa,*aa_i;
902   PetscScalar    *v_i;
903 
904   PetscFunctionBegin;
905   bs  = A->bs;
906   ai  = a->i;
907   aj  = a->j;
908   aa  = a->a;
909   bs2 = a->bs2;
910 
911   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
912 
913   bn  = row/bs;   /* Block number */
914   bp  = row % bs; /* Block Position */
915   M   = ai[bn+1] - ai[bn];
916   *nz = bs*M;
917 
918   if (v) {
919     *v = 0;
920     if (*nz) {
921       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
922       for (i=0; i<M; i++) { /* for each block in the block row */
923         v_i  = *v + i*bs;
924         aa_i = aa + bs2*(ai[bn] + i);
925         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
926       }
927     }
928   }
929 
930   if (idx) {
931     *idx = 0;
932     if (*nz) {
933       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
934       for (i=0; i<M; i++) { /* for each block in the block row */
935         idx_i = *idx + i*bs;
936         itmp  = bs*aj[ai[bn] + i];
937         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
938       }
939     }
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
946 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
947 {
948   PetscErrorCode ierr;
949 
950   PetscFunctionBegin;
951   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
952   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatTranspose_SeqBAIJ"
958 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
959 {
960   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
961   Mat            C;
962   PetscErrorCode ierr;
963   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
964   PetscInt       *rows,*cols,bs2=a->bs2;
965   PetscScalar    *array;
966 
967   PetscFunctionBegin;
968   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
969   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
970   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
971 
972 #if defined(PETSC_USE_MAT_SINGLE)
973   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
974   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
975 #else
976   array = a->a;
977 #endif
978 
979   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
980   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
981   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
982   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
983   ierr = PetscFree(col);CHKERRQ(ierr);
984   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
985   cols = rows + bs;
986   for (i=0; i<mbs; i++) {
987     cols[0] = i*bs;
988     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
989     len = ai[i+1] - ai[i];
990     for (j=0; j<len; j++) {
991       rows[0] = (*aj++)*bs;
992       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
993       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
994       array += bs2;
995     }
996   }
997   ierr = PetscFree(rows);CHKERRQ(ierr);
998 #if defined(PETSC_USE_MAT_SINGLE)
999   ierr = PetscFree(array);
1000 #endif
1001 
1002   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004 
1005   if (B) {
1006     *B = C;
1007   } else {
1008     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1015 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1016 {
1017   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1018   PetscErrorCode ierr;
1019   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1020   int            fd;
1021   PetscScalar    *aa;
1022   FILE           *file;
1023 
1024   PetscFunctionBegin;
1025   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1026   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1027   col_lens[0] = MAT_FILE_COOKIE;
1028 
1029   col_lens[1] = A->m;
1030   col_lens[2] = A->n;
1031   col_lens[3] = a->nz*bs2;
1032 
1033   /* store lengths of each row and write (including header) to file */
1034   count = 0;
1035   for (i=0; i<a->mbs; i++) {
1036     for (j=0; j<bs; j++) {
1037       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1038     }
1039   }
1040   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1041   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1042 
1043   /* store column indices (zero start index) */
1044   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1045   count = 0;
1046   for (i=0; i<a->mbs; i++) {
1047     for (j=0; j<bs; j++) {
1048       for (k=a->i[i]; k<a->i[i+1]; k++) {
1049         for (l=0; l<bs; l++) {
1050           jj[count++] = bs*a->j[k] + l;
1051         }
1052       }
1053     }
1054   }
1055   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1056   ierr = PetscFree(jj);CHKERRQ(ierr);
1057 
1058   /* store nonzero values */
1059   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1060   count = 0;
1061   for (i=0; i<a->mbs; i++) {
1062     for (j=0; j<bs; j++) {
1063       for (k=a->i[i]; k<a->i[i+1]; k++) {
1064         for (l=0; l<bs; l++) {
1065           aa[count++] = a->a[bs2*k + l*bs + j];
1066         }
1067       }
1068     }
1069   }
1070   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1071   ierr = PetscFree(aa);CHKERRQ(ierr);
1072 
1073   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1074   if (file) {
1075     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1082 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1083 {
1084   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1085   PetscErrorCode    ierr;
1086   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1087   PetscViewerFormat format;
1088 
1089   PetscFunctionBegin;
1090   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1091   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1092     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1093   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1094     Mat aij;
1095     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1096     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1097     ierr = MatDestroy(aij);CHKERRQ(ierr);
1098   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1099      PetscFunctionReturn(0);
1100   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1101     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1102     for (i=0; i<a->mbs; i++) {
1103       for (j=0; j<bs; j++) {
1104         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1105         for (k=a->i[i]; k<a->i[i+1]; k++) {
1106           for (l=0; l<bs; l++) {
1107 #if defined(PETSC_USE_COMPLEX)
1108             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1109               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1110                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1111             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1112               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1113                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1114             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1115               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1116             }
1117 #else
1118             if (a->a[bs2*k + l*bs + j] != 0.0) {
1119               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1120             }
1121 #endif
1122           }
1123         }
1124         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1125       }
1126     }
1127     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1128   } else {
1129     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1130     for (i=0; i<a->mbs; i++) {
1131       for (j=0; j<bs; j++) {
1132         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1133         for (k=a->i[i]; k<a->i[i+1]; k++) {
1134           for (l=0; l<bs; l++) {
1135 #if defined(PETSC_USE_COMPLEX)
1136             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1137               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1138                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1139             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1140               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1141                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1142             } else {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1144             }
1145 #else
1146             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1147 #endif
1148           }
1149         }
1150         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1151       }
1152     }
1153     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1154   }
1155   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1161 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1162 {
1163   Mat            A = (Mat) Aa;
1164   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1167   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1168   MatScalar      *aa;
1169   PetscViewer    viewer;
1170 
1171   PetscFunctionBegin;
1172 
1173   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1174   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1175 
1176   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1177 
1178   /* loop over matrix elements drawing boxes */
1179   color = PETSC_DRAW_BLUE;
1180   for (i=0,row=0; i<mbs; i++,row+=bs) {
1181     for (j=a->i[i]; j<a->i[i+1]; j++) {
1182       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1183       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1184       aa = a->a + j*bs2;
1185       for (k=0; k<bs; k++) {
1186         for (l=0; l<bs; l++) {
1187           if (PetscRealPart(*aa++) >=  0.) continue;
1188           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1189         }
1190       }
1191     }
1192   }
1193   color = PETSC_DRAW_CYAN;
1194   for (i=0,row=0; i<mbs; i++,row+=bs) {
1195     for (j=a->i[i]; j<a->i[i+1]; j++) {
1196       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1197       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1198       aa = a->a + j*bs2;
1199       for (k=0; k<bs; k++) {
1200         for (l=0; l<bs; l++) {
1201           if (PetscRealPart(*aa++) != 0.) continue;
1202           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1203         }
1204       }
1205     }
1206   }
1207 
1208   color = PETSC_DRAW_RED;
1209   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213       aa = a->a + j*bs2;
1214       for (k=0; k<bs; k++) {
1215         for (l=0; l<bs; l++) {
1216           if (PetscRealPart(*aa++) <= 0.) continue;
1217           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1218         }
1219       }
1220     }
1221   }
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1227 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1228 {
1229   PetscErrorCode ierr;
1230   PetscReal      xl,yl,xr,yr,w,h;
1231   PetscDraw      draw;
1232   PetscTruth     isnull;
1233 
1234   PetscFunctionBegin;
1235 
1236   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1237   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1238 
1239   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1240   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1241   xr += w;    yr += h;  xl = -w;     yl = -h;
1242   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1243   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1244   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatView_SeqBAIJ"
1250 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1251 {
1252   PetscErrorCode ierr;
1253   PetscTruth     iascii,isbinary,isdraw;
1254 
1255   PetscFunctionBegin;
1256   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1257   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1258   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1259   if (iascii){
1260     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1261   } else if (isbinary) {
1262     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1263   } else if (isdraw) {
1264     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1265   } else {
1266     Mat B;
1267     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1268     ierr = MatView(B,viewer);CHKERRQ(ierr);
1269     ierr = MatDestroy(B);CHKERRQ(ierr);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1277 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1278 {
1279   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1280   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1281   PetscInt    *ai = a->i,*ailen = a->ilen;
1282   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1283   MatScalar   *ap,*aa = a->a,zero = 0.0;
1284 
1285   PetscFunctionBegin;
1286   for (k=0; k<m; k++) { /* loop over rows */
1287     row  = im[k]; brow = row/bs;
1288     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1289     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1290     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1291     nrow = ailen[brow];
1292     for (l=0; l<n; l++) { /* loop over columns */
1293       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1294       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1295       col  = in[l] ;
1296       bcol = col/bs;
1297       cidx = col%bs;
1298       ridx = row%bs;
1299       high = nrow;
1300       low  = 0; /* assume unsorted */
1301       while (high-low > 5) {
1302         t = (low+high)/2;
1303         if (rp[t] > bcol) high = t;
1304         else             low  = t;
1305       }
1306       for (i=low; i<high; i++) {
1307         if (rp[i] > bcol) break;
1308         if (rp[i] == bcol) {
1309           *v++ = ap[bs2*i+bs*cidx+ridx];
1310           goto finished;
1311         }
1312       }
1313       *v++ = zero;
1314       finished:;
1315     }
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #if defined(PETSC_USE_MAT_SINGLE)
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1323 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1324 {
1325   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1326   PetscErrorCode ierr;
1327   PetscInt       i,N = m*n*b->bs2;
1328   MatScalar      *vsingle;
1329 
1330   PetscFunctionBegin;
1331   if (N > b->setvalueslen) {
1332     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1333     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1334     b->setvalueslen  = N;
1335   }
1336   vsingle = b->setvaluescopy;
1337   for (i=0; i<N; i++) {
1338     vsingle[i] = v[i];
1339   }
1340   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 #endif
1344 
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1348 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1349 {
1350   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1351   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1352   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1353   PetscErrorCode  ierr;
1354   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1355   PetscTruth      roworiented=a->roworiented;
1356   const MatScalar *value = v;
1357   MatScalar       *ap,*aa = a->a,*bap;
1358 
1359   PetscFunctionBegin;
1360   if (roworiented) {
1361     stepval = (n-1)*bs;
1362   } else {
1363     stepval = (m-1)*bs;
1364   }
1365   for (k=0; k<m; k++) { /* loop over added rows */
1366     row  = im[k];
1367     if (row < 0) continue;
1368 #if defined(PETSC_USE_DEBUG)
1369     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1370 #endif
1371     rp   = aj + ai[row];
1372     ap   = aa + bs2*ai[row];
1373     rmax = imax[row];
1374     nrow = ailen[row];
1375     low  = 0;
1376     high = nrow;
1377     for (l=0; l<n; l++) { /* loop over added columns */
1378       if (in[l] < 0) continue;
1379 #if defined(PETSC_USE_DEBUG)
1380       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1381 #endif
1382       col = in[l];
1383       if (roworiented) {
1384         value = v + k*(stepval+bs)*bs + l*bs;
1385       } else {
1386         value = v + l*(stepval+bs)*bs + k*bs;
1387       }
1388       if (col < lastcol) low = 0; else high = nrow;
1389       lastcol = col;
1390       while (high-low > 7) {
1391         t = (low+high)/2;
1392         if (rp[t] > col) high = t;
1393         else             low  = t;
1394       }
1395       for (i=low; i<high; i++) {
1396         if (rp[i] > col) break;
1397         if (rp[i] == col) {
1398           bap  = ap +  bs2*i;
1399           if (roworiented) {
1400             if (is == ADD_VALUES) {
1401               for (ii=0; ii<bs; ii++,value+=stepval) {
1402                 for (jj=ii; jj<bs2; jj+=bs) {
1403                   bap[jj] += *value++;
1404                 }
1405               }
1406             } else {
1407               for (ii=0; ii<bs; ii++,value+=stepval) {
1408                 for (jj=ii; jj<bs2; jj+=bs) {
1409                   bap[jj] = *value++;
1410                 }
1411               }
1412             }
1413           } else {
1414             if (is == ADD_VALUES) {
1415               for (ii=0; ii<bs; ii++,value+=stepval) {
1416                 for (jj=0; jj<bs; jj++) {
1417                   *bap++ += *value++;
1418                 }
1419               }
1420             } else {
1421               for (ii=0; ii<bs; ii++,value+=stepval) {
1422                 for (jj=0; jj<bs; jj++) {
1423                   *bap++  = *value++;
1424                 }
1425               }
1426             }
1427           }
1428           goto noinsert2;
1429         }
1430       }
1431       if (nonew == 1) goto noinsert2;
1432       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1433       if (nrow >= rmax) {
1434         /* there is no extra room in row, therefore enlarge */
1435         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1436         MatScalar *new_a;
1437 
1438         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1439 
1440         /* malloc new storage space */
1441         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1442 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1443         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1444         new_i   = new_j + new_nz;
1445 
1446         /* copy over old data into new slots */
1447         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1448         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1449         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1450         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1451         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1452         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1453         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1454         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1455         /* free up old matrix storage */
1456         ierr = PetscFree(a->a);CHKERRQ(ierr);
1457         if (!a->singlemalloc) {
1458           ierr = PetscFree(a->i);CHKERRQ(ierr);
1459           ierr = PetscFree(a->j);CHKERRQ(ierr);
1460         }
1461         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1462         a->singlemalloc = PETSC_TRUE;
1463 
1464         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1465         rmax = imax[row] = imax[row] + CHUNKSIZE;
1466         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1467         a->maxnz += bs2*CHUNKSIZE;
1468         a->reallocs++;
1469         a->nz++;
1470       }
1471       N = nrow++ - 1;
1472       /* shift up all the later entries in this row */
1473       for (ii=N; ii>=i; ii--) {
1474         rp[ii+1] = rp[ii];
1475         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       if (N >= i) {
1478         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       rp[i] = col;
1481       bap   = ap +  bs2*i;
1482       if (roworiented) {
1483         for (ii=0; ii<bs; ii++,value+=stepval) {
1484           for (jj=ii; jj<bs2; jj+=bs) {
1485             bap[jj] = *value++;
1486           }
1487         }
1488       } else {
1489         for (ii=0; ii<bs; ii++,value+=stepval) {
1490           for (jj=0; jj<bs; jj++) {
1491             *bap++  = *value++;
1492           }
1493         }
1494       }
1495       noinsert2:;
1496       low = i;
1497     }
1498     ailen[row] = nrow;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506 {
1507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1510   PetscErrorCode ierr;
1511   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512   MatScalar      *aa = a->a,*ap;
1513   PetscReal      ratio=0.6;
1514 
1515   PetscFunctionBegin;
1516   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1517 
1518   if (m) rmax = ailen[0];
1519   for (i=1; i<mbs; i++) {
1520     /* move each row back by the amount of empty slots (fshift) before it*/
1521     fshift += imax[i-1] - ailen[i-1];
1522     rmax   = PetscMax(rmax,ailen[i]);
1523     if (fshift) {
1524       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525       N = ailen[i];
1526       for (j=0; j<N; j++) {
1527         ip[j-fshift] = ip[j];
1528         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529       }
1530     }
1531     ai[i] = ai[i-1] + ailen[i-1];
1532   }
1533   if (mbs) {
1534     fshift += imax[mbs-1] - ailen[mbs-1];
1535     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536   }
1537   /* reset ilen and imax for each row */
1538   for (i=0; i<mbs; i++) {
1539     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540   }
1541   a->nz = ai[mbs];
1542 
1543   /* diagonals may have moved, so kill the diagonal pointers */
1544   a->idiagvalid = PETSC_FALSE;
1545   if (fshift && a->diag) {
1546     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1547     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1548     a->diag = 0;
1549   }
1550   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1551   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1552   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1553   a->reallocs          = 0;
1554   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1555 
1556   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557   if (a->compressedrow.use){
1558     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1559   }
1560 
1561   A->same_nonzero = PETSC_TRUE;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /*
1566    This function returns an array of flags which indicate the locations of contiguous
1567    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569    Assume: sizes should be long enough to hold all the values.
1570 */
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574 {
1575   PetscInt   i,j,k,row;
1576   PetscTruth flg;
1577 
1578   PetscFunctionBegin;
1579   for (i=0,j=0; i<n; j++) {
1580     row = idx[i];
1581     if (row%bs!=0) { /* Not the begining of a block */
1582       sizes[j] = 1;
1583       i++;
1584     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586       i++;
1587     } else { /* Begining of the block, so check if the complete block exists */
1588       flg = PETSC_TRUE;
1589       for (k=1; k<bs; k++) {
1590         if (row+k != idx[i+k]) { /* break in the block */
1591           flg = PETSC_FALSE;
1592           break;
1593         }
1594       }
1595       if (flg) { /* No break in the bs */
1596         sizes[j] = bs;
1597         i+= bs;
1598       } else {
1599         sizes[j] = 1;
1600         i++;
1601       }
1602     }
1603   }
1604   *bs_max = j;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1611 {
1612   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1615   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1616   PetscScalar    zero = 0.0;
1617   MatScalar      *aa;
1618 
1619   PetscFunctionBegin;
1620   /* Make a copy of the IS and  sort it */
1621   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1622   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1623 
1624   /* allocate memory for rows,sizes */
1625   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1626   sizes = rows + is_n;
1627 
1628   /* copy IS values to rows, and sort them */
1629   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1630   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1631   if (baij->keepzeroedrows) {
1632     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1633     bs_max = is_n;
1634     A->same_nonzero = PETSC_TRUE;
1635   } else {
1636     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1637     A->same_nonzero = PETSC_FALSE;
1638   }
1639   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1640 
1641   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1642     row   = rows[j];
1643     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1644     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1645     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1646     if (sizes[i] == bs && !baij->keepzeroedrows) {
1647       if (diag) {
1648         if (baij->ilen[row/bs] > 0) {
1649           baij->ilen[row/bs]       = 1;
1650           baij->j[baij->i[row/bs]] = row/bs;
1651           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1652         }
1653         /* Now insert all the diagonal values for this bs */
1654         for (k=0; k<bs; k++) {
1655           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1656         }
1657       } else { /* (!diag) */
1658         baij->ilen[row/bs] = 0;
1659       } /* end (!diag) */
1660     } else { /* (sizes[i] != bs) */
1661 #if defined (PETSC_USE_DEBUG)
1662       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1663 #endif
1664       for (k=0; k<count; k++) {
1665         aa[0] =  zero;
1666         aa    += bs;
1667       }
1668       if (diag) {
1669         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1670       }
1671     }
1672   }
1673 
1674   ierr = PetscFree(rows);CHKERRQ(ierr);
1675   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1681 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1682 {
1683   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1684   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1685   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1686   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1687   PetscErrorCode ierr;
1688   PetscInt       ridx,cidx,bs2=a->bs2;
1689   PetscTruth     roworiented=a->roworiented;
1690   MatScalar      *ap,value,*aa=a->a,*bap;
1691 
1692   PetscFunctionBegin;
1693   for (k=0; k<m; k++) { /* loop over added rows */
1694     row  = im[k]; brow = row/bs;
1695     if (row < 0) continue;
1696 #if defined(PETSC_USE_DEBUG)
1697     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1698 #endif
1699     rp   = aj + ai[brow];
1700     ap   = aa + bs2*ai[brow];
1701     rmax = imax[brow];
1702     nrow = ailen[brow];
1703     low  = 0;
1704     high = nrow;
1705     for (l=0; l<n; l++) { /* loop over added columns */
1706       if (in[l] < 0) continue;
1707 #if defined(PETSC_USE_DEBUG)
1708       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1709 #endif
1710       col = in[l]; bcol = col/bs;
1711       ridx = row % bs; cidx = col % bs;
1712       if (roworiented) {
1713         value = v[l + k*n];
1714       } else {
1715         value = v[k + l*m];
1716       }
1717       if (col < lastcol) low = 0; else high = nrow;
1718       lastcol = col;
1719       while (high-low > 7) {
1720         t = (low+high)/2;
1721         if (rp[t] > bcol) high = t;
1722         else              low  = t;
1723       }
1724       for (i=low; i<high; i++) {
1725         if (rp[i] > bcol) break;
1726         if (rp[i] == bcol) {
1727           bap  = ap +  bs2*i + bs*cidx + ridx;
1728           if (is == ADD_VALUES) *bap += value;
1729           else                  *bap  = value;
1730           goto noinsert1;
1731         }
1732       }
1733       if (nonew == 1) goto noinsert1;
1734       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1735       if (nrow >= rmax) {
1736         /* there is no extra room in row, therefore enlarge */
1737         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1738         MatScalar *new_a;
1739 
1740         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1741 
1742         /* Malloc new storage space */
1743         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1744 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1745         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1746         new_i   = new_j + new_nz;
1747 
1748         /* copy over old data into new slots */
1749         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1750         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1751         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1752         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1753         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1754         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1755         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1756         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1757         /* free up old matrix storage */
1758         ierr = PetscFree(a->a);CHKERRQ(ierr);
1759         if (!a->singlemalloc) {
1760           ierr = PetscFree(a->i);CHKERRQ(ierr);
1761           ierr = PetscFree(a->j);CHKERRQ(ierr);
1762         }
1763         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1764         a->singlemalloc = PETSC_TRUE;
1765 
1766         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1767         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1768         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1769         a->maxnz += bs2*CHUNKSIZE;
1770         a->reallocs++;
1771         a->nz++;
1772       }
1773       N = nrow++ - 1;
1774       /* shift up all the later entries in this row */
1775       for (ii=N; ii>=i; ii--) {
1776         rp[ii+1] = rp[ii];
1777         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1778       }
1779       if (N>=i) {
1780         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1781       }
1782       rp[i]                      = bcol;
1783       ap[bs2*i + bs*cidx + ridx] = value;
1784       noinsert1:;
1785       low = i;
1786     }
1787     ailen[brow] = nrow;
1788   }
1789   A->same_nonzero = PETSC_FALSE;
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1796 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1797 {
1798   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1799   Mat            outA;
1800   PetscErrorCode ierr;
1801   PetscTruth     row_identity,col_identity;
1802 
1803   PetscFunctionBegin;
1804   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1805   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1806   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1807   if (!row_identity || !col_identity) {
1808     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1809   }
1810 
1811   outA          = inA;
1812   inA->factor   = FACTOR_LU;
1813 
1814   if (!a->diag) {
1815     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1816   }
1817 
1818   a->row        = row;
1819   a->col        = col;
1820   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1821   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1822 
1823   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1824   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1825   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1826 
1827   /*
1828       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1829       for ILU(0) factorization with natural ordering
1830   */
1831   if (inA->bs < 8) {
1832     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1833   } else {
1834     if (!a->solve_work) {
1835       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1836       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1837     }
1838   }
1839 
1840   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1841 
1842   PetscFunctionReturn(0);
1843 }
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1846 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1847 {
1848   static PetscTruth called = PETSC_FALSE;
1849   MPI_Comm          comm = A->comm;
1850   PetscErrorCode    ierr;
1851 
1852   PetscFunctionBegin;
1853   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1854   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1855   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 EXTERN_C_BEGIN
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1862 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1863 {
1864   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1865   PetscInt    i,nz,nbs;
1866 
1867   PetscFunctionBegin;
1868   nz  = baij->maxnz/baij->bs2;
1869   nbs = baij->nbs;
1870   for (i=0; i<nz; i++) {
1871     baij->j[i] = indices[i];
1872   }
1873   baij->nz = nz;
1874   for (i=0; i<nbs; i++) {
1875     baij->ilen[i] = baij->imax[i];
1876   }
1877 
1878   PetscFunctionReturn(0);
1879 }
1880 EXTERN_C_END
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1884 /*@
1885     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1886        in the matrix.
1887 
1888   Input Parameters:
1889 +  mat - the SeqBAIJ matrix
1890 -  indices - the column indices
1891 
1892   Level: advanced
1893 
1894   Notes:
1895     This can be called if you have precomputed the nonzero structure of the
1896   matrix and want to provide it to the matrix object to improve the performance
1897   of the MatSetValues() operation.
1898 
1899     You MUST have set the correct numbers of nonzeros per row in the call to
1900   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1901 
1902     MUST be called before any calls to MatSetValues();
1903 
1904 @*/
1905 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1906 {
1907   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1908 
1909   PetscFunctionBegin;
1910   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1911   PetscValidPointer(indices,2);
1912   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1913   if (f) {
1914     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1915   } else {
1916     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1923 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1924 {
1925   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1926   PetscErrorCode ierr;
1927   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1928   PetscReal      atmp;
1929   PetscScalar    *x,zero = 0.0;
1930   MatScalar      *aa;
1931   PetscInt       ncols,brow,krow,kcol;
1932 
1933   PetscFunctionBegin;
1934   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1935   bs   = A->bs;
1936   aa   = a->a;
1937   ai   = a->i;
1938   aj   = a->j;
1939   mbs = a->mbs;
1940 
1941   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1942   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1943   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1944   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1945   for (i=0; i<mbs; i++) {
1946     ncols = ai[1] - ai[0]; ai++;
1947     brow  = bs*i;
1948     for (j=0; j<ncols; j++){
1949       /* bcol = bs*(*aj); */
1950       for (kcol=0; kcol<bs; kcol++){
1951         for (krow=0; krow<bs; krow++){
1952           atmp = PetscAbsScalar(*aa); aa++;
1953           row = brow + krow;    /* row index */
1954           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1955           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1956         }
1957       }
1958       aj++;
1959     }
1960   }
1961   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1967 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1968 {
1969   PetscErrorCode ierr;
1970 
1971   PetscFunctionBegin;
1972   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1978 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1979 {
1980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1981   PetscFunctionBegin;
1982   *array = a->a;
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 #undef __FUNCT__
1987 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1988 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1989 {
1990   PetscFunctionBegin;
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #include "petscblaslapack.h"
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1997 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1998 {
1999   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2000   PetscErrorCode ierr;
2001   PetscInt       i,bs=Y->bs,j,bs2;
2002   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2003 
2004   PetscFunctionBegin;
2005   if (str == SAME_NONZERO_PATTERN) {
2006     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2007   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2008     if (y->xtoy && y->XtoY != X) {
2009       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2010       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2011     }
2012     if (!y->xtoy) { /* get xtoy */
2013       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2014       y->XtoY = X;
2015     }
2016     bs2 = bs*bs;
2017     for (i=0; i<x->nz; i++) {
2018       j = 0;
2019       while (j < bs2){
2020         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2021         j++;
2022       }
2023     }
2024     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2025   } else {
2026     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 /* -------------------------------------------------------------------*/
2032 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2033        MatGetRow_SeqBAIJ,
2034        MatRestoreRow_SeqBAIJ,
2035        MatMult_SeqBAIJ_N,
2036 /* 4*/ MatMultAdd_SeqBAIJ_N,
2037        MatMultTranspose_SeqBAIJ,
2038        MatMultTransposeAdd_SeqBAIJ,
2039        MatSolve_SeqBAIJ_N,
2040        0,
2041        0,
2042 /*10*/ 0,
2043        MatLUFactor_SeqBAIJ,
2044        0,
2045        0,
2046        MatTranspose_SeqBAIJ,
2047 /*15*/ MatGetInfo_SeqBAIJ,
2048        MatEqual_SeqBAIJ,
2049        MatGetDiagonal_SeqBAIJ,
2050        MatDiagonalScale_SeqBAIJ,
2051        MatNorm_SeqBAIJ,
2052 /*20*/ 0,
2053        MatAssemblyEnd_SeqBAIJ,
2054        0,
2055        MatSetOption_SeqBAIJ,
2056        MatZeroEntries_SeqBAIJ,
2057 /*25*/ MatZeroRows_SeqBAIJ,
2058        MatLUFactorSymbolic_SeqBAIJ,
2059        MatLUFactorNumeric_SeqBAIJ_N,
2060        MatCholeskyFactorSymbolic_SeqBAIJ,
2061        MatCholeskyFactorNumeric_SeqBAIJ_N,
2062 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2063        MatILUFactorSymbolic_SeqBAIJ,
2064        MatICCFactorSymbolic_SeqBAIJ,
2065        MatGetArray_SeqBAIJ,
2066        MatRestoreArray_SeqBAIJ,
2067 /*35*/ MatDuplicate_SeqBAIJ,
2068        0,
2069        0,
2070        MatILUFactor_SeqBAIJ,
2071        0,
2072 /*40*/ MatAXPY_SeqBAIJ,
2073        MatGetSubMatrices_SeqBAIJ,
2074        MatIncreaseOverlap_SeqBAIJ,
2075        MatGetValues_SeqBAIJ,
2076        0,
2077 /*45*/ MatPrintHelp_SeqBAIJ,
2078        MatScale_SeqBAIJ,
2079        0,
2080        0,
2081        0,
2082 /*50*/ 0,
2083        MatGetRowIJ_SeqBAIJ,
2084        MatRestoreRowIJ_SeqBAIJ,
2085        0,
2086        0,
2087 /*55*/ 0,
2088        0,
2089        0,
2090        0,
2091        MatSetValuesBlocked_SeqBAIJ,
2092 /*60*/ MatGetSubMatrix_SeqBAIJ,
2093        MatDestroy_SeqBAIJ,
2094        MatView_SeqBAIJ,
2095        MatGetPetscMaps_Petsc,
2096        0,
2097 /*65*/ 0,
2098        0,
2099        0,
2100        0,
2101        0,
2102 /*70*/ MatGetRowMax_SeqBAIJ,
2103        MatConvert_Basic,
2104        0,
2105        0,
2106        0,
2107 /*75*/ 0,
2108        0,
2109        0,
2110        0,
2111        0,
2112 /*80*/ 0,
2113        0,
2114        0,
2115        0,
2116        MatLoad_SeqBAIJ,
2117 /*85*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*90*/ 0,
2123        0,
2124        0,
2125        0,
2126        0,
2127 /*95*/ 0,
2128        0,
2129        0,
2130        0};
2131 
2132 EXTERN_C_BEGIN
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2135 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2136 {
2137   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2138   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   if (aij->nonew != 1) {
2143     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2144   }
2145 
2146   /* allocate space for values if not already there */
2147   if (!aij->saved_values) {
2148     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2149   }
2150 
2151   /* copy values over */
2152   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 EXTERN_C_END
2156 
2157 EXTERN_C_BEGIN
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2160 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2161 {
2162   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2163   PetscErrorCode ierr;
2164   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2165 
2166   PetscFunctionBegin;
2167   if (aij->nonew != 1) {
2168     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2169   }
2170   if (!aij->saved_values) {
2171     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2172   }
2173 
2174   /* copy values over */
2175   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 EXTERN_C_END
2179 
2180 EXTERN_C_BEGIN
2181 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2182 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2183 EXTERN_C_END
2184 
2185 EXTERN_C_BEGIN
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2188 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2189 {
2190   Mat_SeqBAIJ    *b;
2191   PetscErrorCode ierr;
2192   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2193   PetscTruth     flg;
2194 
2195   PetscFunctionBegin;
2196 
2197   B->preallocated = PETSC_TRUE;
2198   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2199   if (nnz && newbs != bs) {
2200     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2201   }
2202   bs   = newbs;
2203 
2204   mbs  = B->m/bs;
2205   nbs  = B->n/bs;
2206   bs2  = bs*bs;
2207 
2208   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2209     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2210   }
2211 
2212   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2213   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2214   if (nnz) {
2215     for (i=0; i<mbs; i++) {
2216       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2217       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2218     }
2219   }
2220 
2221   b       = (Mat_SeqBAIJ*)B->data;
2222   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2223   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2224   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2225   if (!flg) {
2226     switch (bs) {
2227     case 1:
2228       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2229       B->ops->mult            = MatMult_SeqBAIJ_1;
2230       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2231       break;
2232     case 2:
2233       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2234       B->ops->mult            = MatMult_SeqBAIJ_2;
2235       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2236       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2237       break;
2238     case 3:
2239       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2240       B->ops->mult            = MatMult_SeqBAIJ_3;
2241       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2242       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2243       break;
2244     case 4:
2245       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2246       B->ops->mult            = MatMult_SeqBAIJ_4;
2247       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2248       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2249       break;
2250     case 5:
2251       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2252       B->ops->mult            = MatMult_SeqBAIJ_5;
2253       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2254       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2255       break;
2256     case 6:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2258       B->ops->mult            = MatMult_SeqBAIJ_6;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2260       break;
2261     case 7:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2263       B->ops->mult            = MatMult_SeqBAIJ_7;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2265       break;
2266     default:
2267       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2268       B->ops->mult            = MatMult_SeqBAIJ_N;
2269       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2270       break;
2271     }
2272   }
2273   B->bs      = bs;
2274   b->mbs     = mbs;
2275   b->nbs     = nbs;
2276   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2277   if (!nnz) {
2278     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2279     else if (nz <= 0)        nz = 1;
2280     for (i=0; i<mbs; i++) b->imax[i] = nz;
2281     nz = nz*mbs;
2282   } else {
2283     nz = 0;
2284     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2285   }
2286 
2287   /* allocate the matrix space */
2288   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2289   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2290   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2291   b->j  = (PetscInt*)(b->a + nz*bs2);
2292   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2293   b->i  = b->j + nz;
2294   b->singlemalloc = PETSC_TRUE;
2295 
2296   b->i[0] = 0;
2297   for (i=1; i<mbs+1; i++) {
2298     b->i[i] = b->i[i-1] + b->imax[i-1];
2299   }
2300 
2301   /* b->ilen will count nonzeros in each block row so far. */
2302   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2303   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2304   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2305 
2306   B->bs               = bs;
2307   b->bs2              = bs2;
2308   b->mbs              = mbs;
2309   b->nz               = 0;
2310   b->maxnz            = nz*bs2;
2311   B->info.nz_unneeded = (PetscReal)b->maxnz;
2312   PetscFunctionReturn(0);
2313 }
2314 EXTERN_C_END
2315 
2316 /*MC
2317    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2318    block sparse compressed row format.
2319 
2320    Options Database Keys:
2321 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2322 
2323   Level: beginner
2324 
2325 .seealso: MatCreateSeqBAIJ
2326 M*/
2327 
2328 EXTERN_C_BEGIN
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatCreate_SeqBAIJ"
2331 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2332 {
2333   PetscErrorCode ierr;
2334   PetscMPIInt    size;
2335   Mat_SeqBAIJ    *b;
2336 
2337   PetscFunctionBegin;
2338   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2339   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2340 
2341   B->m = B->M = PetscMax(B->m,B->M);
2342   B->n = B->N = PetscMax(B->n,B->N);
2343   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2344   B->data = (void*)b;
2345   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2346   B->factor           = 0;
2347   B->mapping          = 0;
2348   b->row              = 0;
2349   b->col              = 0;
2350   b->icol             = 0;
2351   b->reallocs         = 0;
2352   b->saved_values     = 0;
2353 #if defined(PETSC_USE_MAT_SINGLE)
2354   b->setvalueslen     = 0;
2355   b->setvaluescopy    = PETSC_NULL;
2356 #endif
2357 
2358   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2359   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2360 
2361   b->sorted           = PETSC_FALSE;
2362   b->roworiented      = PETSC_TRUE;
2363   b->nonew            = 0;
2364   b->diag             = 0;
2365   b->solve_work       = 0;
2366   b->mult_work        = 0;
2367   B->spptr            = 0;
2368   B->info.nz_unneeded = (PetscReal)b->maxnz;
2369   b->keepzeroedrows   = PETSC_FALSE;
2370   b->xtoy              = 0;
2371   b->XtoY              = 0;
2372   b->compressedrow.use     = PETSC_FALSE;
2373   b->compressedrow.nrows   = 0;
2374   b->compressedrow.i       = PETSC_NULL;
2375   b->compressedrow.rindex  = PETSC_NULL;
2376   b->compressedrow.checked = PETSC_FALSE;
2377   B->same_nonzero          = PETSC_FALSE;
2378 
2379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2380                                      "MatStoreValues_SeqBAIJ",
2381                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2383                                      "MatRetrieveValues_SeqBAIJ",
2384                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2386                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2387                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2389                                      "MatConvert_SeqBAIJ_SeqAIJ",
2390                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2391 #if !defined(PETSC_USE_64BIT_INT)
2392   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2393                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2394                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2395 #endif
2396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2397                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2398                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2399   PetscFunctionReturn(0);
2400 }
2401 EXTERN_C_END
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2405 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2406 {
2407   Mat            C;
2408   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2409   PetscErrorCode ierr;
2410   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2411 
2412   PetscFunctionBegin;
2413   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2414 
2415   *B = 0;
2416   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2417   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2418   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2419   c    = (Mat_SeqBAIJ*)C->data;
2420 
2421   C->M   = A->M;
2422   C->N   = A->N;
2423   C->bs  = A->bs;
2424   c->bs2 = a->bs2;
2425   c->mbs = a->mbs;
2426   c->nbs = a->nbs;
2427 
2428   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2429   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2430   for (i=0; i<mbs; i++) {
2431     c->imax[i] = a->imax[i];
2432     c->ilen[i] = a->ilen[i];
2433   }
2434 
2435   /* allocate the matrix space */
2436   c->singlemalloc = PETSC_TRUE;
2437   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2438   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2439   c->j = (PetscInt*)(c->a + nz*bs2);
2440   c->i = c->j + nz;
2441   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2442   if (mbs > 0) {
2443     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2444     if (cpvalues == MAT_COPY_VALUES) {
2445       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2446     } else {
2447       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2448     }
2449   }
2450 
2451   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2452   c->sorted      = a->sorted;
2453   c->roworiented = a->roworiented;
2454   c->nonew       = a->nonew;
2455 
2456   if (a->diag) {
2457     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2458     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2459     for (i=0; i<mbs; i++) {
2460       c->diag[i] = a->diag[i];
2461     }
2462   } else c->diag        = 0;
2463   c->nz                 = a->nz;
2464   c->maxnz              = a->maxnz;
2465   c->solve_work         = 0;
2466   c->mult_work          = 0;
2467   C->preallocated       = PETSC_TRUE;
2468   C->assembled          = PETSC_TRUE;
2469 
2470   c->compressedrow.use     = a->compressedrow.use;
2471   c->compressedrow.nrows   = a->compressedrow.nrows;
2472   c->compressedrow.checked = a->compressedrow.checked;
2473   if ( a->compressedrow.checked && a->compressedrow.use){
2474     i = a->compressedrow.nrows;
2475     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2476     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2477     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2478     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2479   } else {
2480     c->compressedrow.use    = PETSC_FALSE;
2481     c->compressedrow.i      = PETSC_NULL;
2482     c->compressedrow.rindex = PETSC_NULL;
2483   }
2484   C->same_nonzero = A->same_nonzero;
2485   *B = C;
2486   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatLoad_SeqBAIJ"
2492 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2493 {
2494   Mat_SeqBAIJ    *a;
2495   Mat            B;
2496   PetscErrorCode ierr;
2497   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2498   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2499   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2500   PetscInt       *masked,nmask,tmp,bs2,ishift;
2501   PetscMPIInt    size;
2502   int            fd;
2503   PetscScalar    *aa;
2504   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2508   bs2  = bs*bs;
2509 
2510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2511   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2512   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2513   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2514   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2515   M = header[1]; N = header[2]; nz = header[3];
2516 
2517   if (header[3] < 0) {
2518     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2519   }
2520 
2521   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2522 
2523   /*
2524      This code adds extra rows to make sure the number of rows is
2525     divisible by the blocksize
2526   */
2527   mbs        = M/bs;
2528   extra_rows = bs - M + bs*(mbs);
2529   if (extra_rows == bs) extra_rows = 0;
2530   else                  mbs++;
2531   if (extra_rows) {
2532     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2533   }
2534 
2535   /* read in row lengths */
2536   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2537   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2538   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2539 
2540   /* read in column indices */
2541   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2542   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2543   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2544 
2545   /* loop over row lengths determining block row lengths */
2546   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2547   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2548   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2549   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2550   masked   = mask + mbs;
2551   rowcount = 0; nzcount = 0;
2552   for (i=0; i<mbs; i++) {
2553     nmask = 0;
2554     for (j=0; j<bs; j++) {
2555       kmax = rowlengths[rowcount];
2556       for (k=0; k<kmax; k++) {
2557         tmp = jj[nzcount++]/bs;
2558         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2559       }
2560       rowcount++;
2561     }
2562     browlengths[i] += nmask;
2563     /* zero out the mask elements we set */
2564     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2565   }
2566 
2567   /* create our matrix */
2568   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2569   ierr = MatSetType(B,type);CHKERRQ(ierr);
2570   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2571   a = (Mat_SeqBAIJ*)B->data;
2572 
2573   /* set matrix "i" values */
2574   a->i[0] = 0;
2575   for (i=1; i<= mbs; i++) {
2576     a->i[i]      = a->i[i-1] + browlengths[i-1];
2577     a->ilen[i-1] = browlengths[i-1];
2578   }
2579   a->nz         = 0;
2580   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2581 
2582   /* read in nonzero values */
2583   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2584   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2585   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2586 
2587   /* set "a" and "j" values into matrix */
2588   nzcount = 0; jcount = 0;
2589   for (i=0; i<mbs; i++) {
2590     nzcountb = nzcount;
2591     nmask    = 0;
2592     for (j=0; j<bs; j++) {
2593       kmax = rowlengths[i*bs+j];
2594       for (k=0; k<kmax; k++) {
2595         tmp = jj[nzcount++]/bs;
2596 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2597       }
2598     }
2599     /* sort the masked values */
2600     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2601 
2602     /* set "j" values into matrix */
2603     maskcount = 1;
2604     for (j=0; j<nmask; j++) {
2605       a->j[jcount++]  = masked[j];
2606       mask[masked[j]] = maskcount++;
2607     }
2608     /* set "a" values into matrix */
2609     ishift = bs2*a->i[i];
2610     for (j=0; j<bs; j++) {
2611       kmax = rowlengths[i*bs+j];
2612       for (k=0; k<kmax; k++) {
2613         tmp       = jj[nzcountb]/bs ;
2614         block     = mask[tmp] - 1;
2615         point     = jj[nzcountb] - bs*tmp;
2616         idx       = ishift + bs2*block + j + bs*point;
2617         a->a[idx] = (MatScalar)aa[nzcountb++];
2618       }
2619     }
2620     /* zero out the mask elements we set */
2621     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2622   }
2623   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2624 
2625   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2626   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2627   ierr = PetscFree(aa);CHKERRQ(ierr);
2628   ierr = PetscFree(jj);CHKERRQ(ierr);
2629   ierr = PetscFree(mask);CHKERRQ(ierr);
2630 
2631   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   ierr = MatView_Private(B);CHKERRQ(ierr);
2634 
2635   *A = B;
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 #undef __FUNCT__
2640 #define __FUNCT__ "MatCreateSeqBAIJ"
2641 /*@C
2642    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2643    compressed row) format.  For good matrix assembly performance the
2644    user should preallocate the matrix storage by setting the parameter nz
2645    (or the array nnz).  By setting these parameters accurately, performance
2646    during matrix assembly can be increased by more than a factor of 50.
2647 
2648    Collective on MPI_Comm
2649 
2650    Input Parameters:
2651 +  comm - MPI communicator, set to PETSC_COMM_SELF
2652 .  bs - size of block
2653 .  m - number of rows
2654 .  n - number of columns
2655 .  nz - number of nonzero blocks  per block row (same for all rows)
2656 -  nnz - array containing the number of nonzero blocks in the various block rows
2657          (possibly different for each block row) or PETSC_NULL
2658 
2659    Output Parameter:
2660 .  A - the matrix
2661 
2662    Options Database Keys:
2663 .   -mat_no_unroll - uses code that does not unroll the loops in the
2664                      block calculations (much slower)
2665 .    -mat_block_size - size of the blocks to use
2666 
2667    Level: intermediate
2668 
2669    Notes:
2670    The number of rows and columns must be divisible by blocksize.
2671 
2672    If the nnz parameter is given then the nz parameter is ignored
2673 
2674    A nonzero block is any block that as 1 or more nonzeros in it
2675 
2676    The block AIJ format is fully compatible with standard Fortran 77
2677    storage.  That is, the stored row and column indices can begin at
2678    either one (as in Fortran) or zero.  See the users' manual for details.
2679 
2680    Specify the preallocated storage with either nz or nnz (not both).
2681    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2682    allocation.  For additional details, see the users manual chapter on
2683    matrices.
2684 
2685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2686 @*/
2687 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2688 {
2689   PetscErrorCode ierr;
2690 
2691   PetscFunctionBegin;
2692   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2693   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2694   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2700 /*@C
2701    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2702    per row in the matrix. For good matrix assembly performance the
2703    user should preallocate the matrix storage by setting the parameter nz
2704    (or the array nnz).  By setting these parameters accurately, performance
2705    during matrix assembly can be increased by more than a factor of 50.
2706 
2707    Collective on MPI_Comm
2708 
2709    Input Parameters:
2710 +  A - the matrix
2711 .  bs - size of block
2712 .  nz - number of block nonzeros per block row (same for all rows)
2713 -  nnz - array containing the number of block nonzeros in the various block rows
2714          (possibly different for each block row) or PETSC_NULL
2715 
2716    Options Database Keys:
2717 .   -mat_no_unroll - uses code that does not unroll the loops in the
2718                      block calculations (much slower)
2719 .    -mat_block_size - size of the blocks to use
2720 
2721    Level: intermediate
2722 
2723    Notes:
2724    If the nnz parameter is given then the nz parameter is ignored
2725 
2726    The block AIJ format is fully compatible with standard Fortran 77
2727    storage.  That is, the stored row and column indices can begin at
2728    either one (as in Fortran) or zero.  See the users' manual for details.
2729 
2730    Specify the preallocated storage with either nz or nnz (not both).
2731    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2732    allocation.  For additional details, see the users manual chapter on
2733    matrices.
2734 
2735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2736 @*/
2737 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2738 {
2739   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2740 
2741   PetscFunctionBegin;
2742   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2743   if (f) {
2744     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2745   }
2746   PetscFunctionReturn(0);
2747 }
2748 
2749