xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 38baddfdda9fcd85a080f141519fb97fb01bc0a4)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = a->bs,mbs = a->mbs;
18   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (a->bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode     ierr;
90   PetscInt           m = a->mbs,i,i2,nz,idx;
91   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode ierr;
192   PetscInt                m = a->mbs,i,i2,nz,idx;
193   const PetscInt          *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       PetscLogFlops(9*(a->nz));
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       PetscLogFlops(15*m);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       PetscLogFlops(9*(a->nz));
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode ierr;
301   PetscInt                m = a->mbs,i,i2,nz,idx;
302   const PetscInt          *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       PetscLogFlops(16*(a->nz));
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       PetscLogFlops(28*m);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       PetscLogFlops(16*(a->nz));
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode ierr;
417   PetscInt                m = a->mbs,i,i2,nz,idx;
418   const PetscInt          *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       PetscLogFlops(25*(a->nz));
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       PetscLogFlops(45*m);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       PetscLogFlops(25*(a->nz));
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscInt               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   PetscInt               *ai=a->i,*ailen=a->ilen;
550   PetscInt               *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   PetscInt         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   PetscInt         *ai=a->i,*ailen=a->ilen;
622   PetscInt         *aj=a->j,brow,bcol;
623   PetscInt         ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt         *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   PetscInt         i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
719   PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   PetscInt         n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     PetscInt nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscInt         i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     PetscInt nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,PetscInt *bs)
786 {
787   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
788 
789   PetscFunctionBegin;
790   *bs = baij->bs;
791   PetscFunctionReturn(0);
792 }
793 
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatDestroy_SeqBAIJ"
797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
798 {
799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
800   PetscErrorCode ierr;
801 
802   PetscFunctionBegin;
803 #if defined(PETSC_USE_LOG)
804   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
805 #endif
806   ierr = PetscFree(a->a);CHKERRQ(ierr);
807   if (!a->singlemalloc) {
808     ierr = PetscFree(a->i);CHKERRQ(ierr);
809     ierr = PetscFree(a->j);CHKERRQ(ierr);
810   }
811   if (a->row) {
812     ierr = ISDestroy(a->row);CHKERRQ(ierr);
813   }
814   if (a->col) {
815     ierr = ISDestroy(a->col);CHKERRQ(ierr);
816   }
817   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
818   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
819   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
820   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
821   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
822   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
823   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
824   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
825 #if defined(PETSC_USE_MAT_SINGLE)
826   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
827 #endif
828   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
829 
830   ierr = PetscFree(a);CHKERRQ(ierr);
831 
832   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
833   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
834   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
835   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
836   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
837   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatSetOption_SeqBAIJ"
843 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
844 {
845   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
846 
847   PetscFunctionBegin;
848   switch (op) {
849   case MAT_ROW_ORIENTED:
850     a->roworiented    = PETSC_TRUE;
851     break;
852   case MAT_COLUMN_ORIENTED:
853     a->roworiented    = PETSC_FALSE;
854     break;
855   case MAT_COLUMNS_SORTED:
856     a->sorted         = PETSC_TRUE;
857     break;
858   case MAT_COLUMNS_UNSORTED:
859     a->sorted         = PETSC_FALSE;
860     break;
861   case MAT_KEEP_ZEROED_ROWS:
862     a->keepzeroedrows = PETSC_TRUE;
863     break;
864   case MAT_NO_NEW_NONZERO_LOCATIONS:
865     a->nonew          = 1;
866     break;
867   case MAT_NEW_NONZERO_LOCATION_ERR:
868     a->nonew          = -1;
869     break;
870   case MAT_NEW_NONZERO_ALLOCATION_ERR:
871     a->nonew          = -2;
872     break;
873   case MAT_YES_NEW_NONZERO_LOCATIONS:
874     a->nonew          = 0;
875     break;
876   case MAT_ROWS_SORTED:
877   case MAT_ROWS_UNSORTED:
878   case MAT_YES_NEW_DIAGONALS:
879   case MAT_IGNORE_OFF_PROC_ENTRIES:
880   case MAT_USE_HASH_TABLE:
881     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
882     break;
883   case MAT_NO_NEW_DIAGONALS:
884     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
885   case MAT_SYMMETRIC:
886   case MAT_STRUCTURALLY_SYMMETRIC:
887   case MAT_NOT_SYMMETRIC:
888   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
889   case MAT_HERMITIAN:
890   case MAT_NOT_HERMITIAN:
891   case MAT_SYMMETRY_ETERNAL:
892   case MAT_NOT_SYMMETRY_ETERNAL:
893     break;
894   default:
895     SETERRQ(PETSC_ERR_SUP,"unknown option");
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatGetRow_SeqBAIJ"
902 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
903 {
904   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
907   MatScalar    *aa,*aa_i;
908   PetscScalar  *v_i;
909 
910   PetscFunctionBegin;
911   bs  = a->bs;
912   ai  = a->i;
913   aj  = a->j;
914   aa  = a->a;
915   bs2 = a->bs2;
916 
917   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
918 
919   bn  = row/bs;   /* Block number */
920   bp  = row % bs; /* Block Position */
921   M   = ai[bn+1] - ai[bn];
922   *nz = bs*M;
923 
924   if (v) {
925     *v = 0;
926     if (*nz) {
927       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
928       for (i=0; i<M; i++) { /* for each block in the block row */
929         v_i  = *v + i*bs;
930         aa_i = aa + bs2*(ai[bn] + i);
931         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
932       }
933     }
934   }
935 
936   if (idx) {
937     *idx = 0;
938     if (*nz) {
939       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
940       for (i=0; i<M; i++) { /* for each block in the block row */
941         idx_i = *idx + i*bs;
942         itmp  = bs*aj[ai[bn] + i];
943         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
944       }
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
952 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
953 {
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
958   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatTranspose_SeqBAIJ"
964 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
965 {
966   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
967   Mat         C;
968   PetscErrorCode ierr;
969   PetscInt         i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
970   PetscInt         *rows,*cols,bs2=a->bs2;
971   PetscScalar *array;
972 
973   PetscFunctionBegin;
974   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
975   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
976   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
977 
978 #if defined(PETSC_USE_MAT_SINGLE)
979   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
980   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
981 #else
982   array = a->a;
983 #endif
984 
985   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
986   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
987   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
988   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
989   ierr = PetscFree(col);CHKERRQ(ierr);
990   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
991   cols = rows + bs;
992   for (i=0; i<mbs; i++) {
993     cols[0] = i*bs;
994     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
995     len = ai[i+1] - ai[i];
996     for (j=0; j<len; j++) {
997       rows[0] = (*aj++)*bs;
998       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
999       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1000       array += bs2;
1001     }
1002   }
1003   ierr = PetscFree(rows);CHKERRQ(ierr);
1004 #if defined(PETSC_USE_MAT_SINGLE)
1005   ierr = PetscFree(array);
1006 #endif
1007 
1008   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010 
1011   if (B) {
1012     *B = C;
1013   } else {
1014     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1015   }
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1021 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1022 {
1023   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1024   PetscErrorCode ierr;
1025   PetscInt         i,fd,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1026   PetscScalar *aa;
1027   FILE        *file;
1028 
1029   PetscFunctionBegin;
1030   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1031   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1032   col_lens[0] = MAT_FILE_COOKIE;
1033 
1034   col_lens[1] = A->m;
1035   col_lens[2] = A->n;
1036   col_lens[3] = a->nz*bs2;
1037 
1038   /* store lengths of each row and write (including header) to file */
1039   count = 0;
1040   for (i=0; i<a->mbs; i++) {
1041     for (j=0; j<bs; j++) {
1042       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1043     }
1044   }
1045   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1046   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1047 
1048   /* store column indices (zero start index) */
1049   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1050   count = 0;
1051   for (i=0; i<a->mbs; i++) {
1052     for (j=0; j<bs; j++) {
1053       for (k=a->i[i]; k<a->i[i+1]; k++) {
1054         for (l=0; l<bs; l++) {
1055           jj[count++] = bs*a->j[k] + l;
1056         }
1057       }
1058     }
1059   }
1060   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1061   ierr = PetscFree(jj);CHKERRQ(ierr);
1062 
1063   /* store nonzero values */
1064   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1065   count = 0;
1066   for (i=0; i<a->mbs; i++) {
1067     for (j=0; j<bs; j++) {
1068       for (k=a->i[i]; k<a->i[i+1]; k++) {
1069         for (l=0; l<bs; l++) {
1070           aa[count++] = a->a[bs2*k + l*bs + j];
1071         }
1072       }
1073     }
1074   }
1075   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1076   ierr = PetscFree(aa);CHKERRQ(ierr);
1077 
1078   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1079   if (file) {
1080     fprintf(file,"-matload_block_size %d\n",a->bs);
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1087 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1088 {
1089   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt               i,j,bs = a->bs,k,l,bs2=a->bs2;
1092   PetscViewerFormat format;
1093 
1094   PetscFunctionBegin;
1095   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1096   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1097     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1098   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1099     Mat aij;
1100     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1101     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1102     ierr = MatDestroy(aij);CHKERRQ(ierr);
1103   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1104      PetscFunctionReturn(0);
1105   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1106     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1107     for (i=0; i<a->mbs; i++) {
1108       for (j=0; j<bs; j++) {
1109         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1110         for (k=a->i[i]; k<a->i[i+1]; k++) {
1111           for (l=0; l<bs; l++) {
1112 #if defined(PETSC_USE_COMPLEX)
1113             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1114               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1115                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1116             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1117               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1118                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1119             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1120               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1121             }
1122 #else
1123             if (a->a[bs2*k + l*bs + j] != 0.0) {
1124               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1125             }
1126 #endif
1127           }
1128         }
1129         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1130       }
1131     }
1132     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1133   } else {
1134     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1135     for (i=0; i<a->mbs; i++) {
1136       for (j=0; j<bs; j++) {
1137         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1138         for (k=a->i[i]; k<a->i[i+1]; k++) {
1139           for (l=0; l<bs; l++) {
1140 #if defined(PETSC_USE_COMPLEX)
1141             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1142               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1143                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1144             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1145               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1146                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1147             } else {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             }
1150 #else
1151             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1152 #endif
1153           }
1154         }
1155         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1156       }
1157     }
1158     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1159   }
1160   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1166 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1167 {
1168   Mat          A = (Mat) Aa;
1169   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1170   PetscErrorCode ierr;
1171   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1172   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1173   MatScalar    *aa;
1174   PetscViewer  viewer;
1175 
1176   PetscFunctionBegin;
1177 
1178   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1179   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1180 
1181   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1182 
1183   /* loop over matrix elements drawing boxes */
1184   color = PETSC_DRAW_BLUE;
1185   for (i=0,row=0; i<mbs; i++,row+=bs) {
1186     for (j=a->i[i]; j<a->i[i+1]; j++) {
1187       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1188       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1189       aa = a->a + j*bs2;
1190       for (k=0; k<bs; k++) {
1191         for (l=0; l<bs; l++) {
1192           if (PetscRealPart(*aa++) >=  0.) continue;
1193           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1194         }
1195       }
1196     }
1197   }
1198   color = PETSC_DRAW_CYAN;
1199   for (i=0,row=0; i<mbs; i++,row+=bs) {
1200     for (j=a->i[i]; j<a->i[i+1]; j++) {
1201       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1202       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1203       aa = a->a + j*bs2;
1204       for (k=0; k<bs; k++) {
1205         for (l=0; l<bs; l++) {
1206           if (PetscRealPart(*aa++) != 0.) continue;
1207           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1208         }
1209       }
1210     }
1211   }
1212 
1213   color = PETSC_DRAW_RED;
1214   for (i=0,row=0; i<mbs; i++,row+=bs) {
1215     for (j=a->i[i]; j<a->i[i+1]; j++) {
1216       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1217       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1218       aa = a->a + j*bs2;
1219       for (k=0; k<bs; k++) {
1220         for (l=0; l<bs; l++) {
1221           if (PetscRealPart(*aa++) <= 0.) continue;
1222           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1223         }
1224       }
1225     }
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1232 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1233 {
1234   PetscErrorCode ierr;
1235   PetscReal    xl,yl,xr,yr,w,h;
1236   PetscDraw    draw;
1237   PetscTruth   isnull;
1238 
1239   PetscFunctionBegin;
1240 
1241   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1242   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1243 
1244   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1245   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1246   xr += w;    yr += h;  xl = -w;     yl = -h;
1247   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1248   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1249   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatView_SeqBAIJ"
1255 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1256 {
1257   PetscErrorCode ierr;
1258   PetscTruth iascii,isbinary,isdraw;
1259 
1260   PetscFunctionBegin;
1261   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1262   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1263   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1264   if (iascii){
1265     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1266   } else if (isbinary) {
1267     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1268   } else if (isdraw) {
1269     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1270   } else {
1271     Mat B;
1272     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1273     ierr = MatView(B,viewer);CHKERRQ(ierr);
1274     ierr = MatDestroy(B);CHKERRQ(ierr);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1282 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1283 {
1284   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1285   PetscInt        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1286   PetscInt        *ai = a->i,*ailen = a->ilen;
1287   PetscInt        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1288   MatScalar  *ap,*aa = a->a,zero = 0.0;
1289 
1290   PetscFunctionBegin;
1291   for (k=0; k<m; k++) { /* loop over rows */
1292     row  = im[k]; brow = row/bs;
1293     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1294     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1295     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1296     nrow = ailen[brow];
1297     for (l=0; l<n; l++) { /* loop over columns */
1298       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1299       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1300       col  = in[l] ;
1301       bcol = col/bs;
1302       cidx = col%bs;
1303       ridx = row%bs;
1304       high = nrow;
1305       low  = 0; /* assume unsorted */
1306       while (high-low > 5) {
1307         t = (low+high)/2;
1308         if (rp[t] > bcol) high = t;
1309         else             low  = t;
1310       }
1311       for (i=low; i<high; i++) {
1312         if (rp[i] > bcol) break;
1313         if (rp[i] == bcol) {
1314           *v++ = ap[bs2*i+bs*cidx+ridx];
1315           goto finished;
1316         }
1317       }
1318       *v++ = zero;
1319       finished:;
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #if defined(PETSC_USE_MAT_SINGLE)
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1328 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1329 {
1330   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1331   PetscErrorCode ierr;
1332   PetscInt         i,N = m*n*b->bs2;
1333   MatScalar   *vsingle;
1334 
1335   PetscFunctionBegin;
1336   if (N > b->setvalueslen) {
1337     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1338     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1339     b->setvalueslen  = N;
1340   }
1341   vsingle = b->setvaluescopy;
1342   for (i=0; i<N; i++) {
1343     vsingle[i] = v[i];
1344   }
1345   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 #endif
1349 
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1353 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1354 {
1355   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1356   PetscInt               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1357   PetscInt               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1358   PetscErrorCode ierr;
1359   PetscInt               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
1360   PetscTruth        roworiented=a->roworiented;
1361   const MatScalar   *value = v;
1362   MatScalar         *ap,*aa = a->a,*bap;
1363 
1364   PetscFunctionBegin;
1365   if (roworiented) {
1366     stepval = (n-1)*bs;
1367   } else {
1368     stepval = (m-1)*bs;
1369   }
1370   for (k=0; k<m; k++) { /* loop over added rows */
1371     row  = im[k];
1372     if (row < 0) continue;
1373 #if defined(PETSC_USE_BOPT_g)
1374     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1375 #endif
1376     rp   = aj + ai[row];
1377     ap   = aa + bs2*ai[row];
1378     rmax = imax[row];
1379     nrow = ailen[row];
1380     low  = 0;
1381     for (l=0; l<n; l++) { /* loop over added columns */
1382       if (in[l] < 0) continue;
1383 #if defined(PETSC_USE_BOPT_g)
1384       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1385 #endif
1386       col = in[l];
1387       if (roworiented) {
1388         value = v + k*(stepval+bs)*bs + l*bs;
1389       } else {
1390         value = v + l*(stepval+bs)*bs + k*bs;
1391       }
1392       if (!sorted) low = 0; high = nrow;
1393       while (high-low > 7) {
1394         t = (low+high)/2;
1395         if (rp[t] > col) high = t;
1396         else             low  = t;
1397       }
1398       for (i=low; i<high; i++) {
1399         if (rp[i] > col) break;
1400         if (rp[i] == col) {
1401           bap  = ap +  bs2*i;
1402           if (roworiented) {
1403             if (is == ADD_VALUES) {
1404               for (ii=0; ii<bs; ii++,value+=stepval) {
1405                 for (jj=ii; jj<bs2; jj+=bs) {
1406                   bap[jj] += *value++;
1407                 }
1408               }
1409             } else {
1410               for (ii=0; ii<bs; ii++,value+=stepval) {
1411                 for (jj=ii; jj<bs2; jj+=bs) {
1412                   bap[jj] = *value++;
1413                 }
1414               }
1415             }
1416           } else {
1417             if (is == ADD_VALUES) {
1418               for (ii=0; ii<bs; ii++,value+=stepval) {
1419                 for (jj=0; jj<bs; jj++) {
1420                   *bap++ += *value++;
1421                 }
1422               }
1423             } else {
1424               for (ii=0; ii<bs; ii++,value+=stepval) {
1425                 for (jj=0; jj<bs; jj++) {
1426                   *bap++  = *value++;
1427                 }
1428               }
1429             }
1430           }
1431           goto noinsert2;
1432         }
1433       }
1434       if (nonew == 1) goto noinsert2;
1435       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1436       if (nrow >= rmax) {
1437         /* there is no extra room in row, therefore enlarge */
1438         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1439         MatScalar *new_a;
1440 
1441         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1442 
1443         /* malloc new storage space */
1444         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1445 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1446         new_j   = (int*)(new_a + bs2*new_nz);
1447         new_i   = new_j + new_nz;
1448 
1449         /* copy over old data into new slots */
1450         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1451         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1452         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1453         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1454         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1455         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1456         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1457         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1458         /* free up old matrix storage */
1459         ierr = PetscFree(a->a);CHKERRQ(ierr);
1460         if (!a->singlemalloc) {
1461           ierr = PetscFree(a->i);CHKERRQ(ierr);
1462           ierr = PetscFree(a->j);CHKERRQ(ierr);
1463         }
1464         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1465         a->singlemalloc = PETSC_TRUE;
1466 
1467         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1468         rmax = imax[row] = imax[row] + CHUNKSIZE;
1469         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1470         a->maxnz += bs2*CHUNKSIZE;
1471         a->reallocs++;
1472         a->nz++;
1473       }
1474       N = nrow++ - 1;
1475       /* shift up all the later entries in this row */
1476       for (ii=N; ii>=i; ii--) {
1477         rp[ii+1] = rp[ii];
1478         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       if (N >= i) {
1481         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1482       }
1483       rp[i] = col;
1484       bap   = ap +  bs2*i;
1485       if (roworiented) {
1486         for (ii=0; ii<bs; ii++,value+=stepval) {
1487           for (jj=ii; jj<bs2; jj+=bs) {
1488             bap[jj] = *value++;
1489           }
1490         }
1491       } else {
1492         for (ii=0; ii<bs; ii++,value+=stepval) {
1493           for (jj=0; jj<bs; jj++) {
1494             *bap++  = *value++;
1495           }
1496         }
1497       }
1498       noinsert2:;
1499       low = i;
1500     }
1501     ailen[row] = nrow;
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1508 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1509 {
1510   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1511   PetscInt        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1512   PetscInt        m = A->m,*ip,N,*ailen = a->ilen;
1513   PetscErrorCode ierr;
1514   PetscInt        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1515   MatScalar  *aa = a->a,*ap;
1516 
1517   PetscFunctionBegin;
1518   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1519 
1520   if (m) rmax = ailen[0];
1521   for (i=1; i<mbs; i++) {
1522     /* move each row back by the amount of empty slots (fshift) before it*/
1523     fshift += imax[i-1] - ailen[i-1];
1524     rmax   = PetscMax(rmax,ailen[i]);
1525     if (fshift) {
1526       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1527       N = ailen[i];
1528       for (j=0; j<N; j++) {
1529         ip[j-fshift] = ip[j];
1530         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1531       }
1532     }
1533     ai[i] = ai[i-1] + ailen[i-1];
1534   }
1535   if (mbs) {
1536     fshift += imax[mbs-1] - ailen[mbs-1];
1537     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1538   }
1539   /* reset ilen and imax for each row */
1540   for (i=0; i<mbs; i++) {
1541     ailen[i] = imax[i] = ai[i+1] - ai[i];
1542   }
1543   a->nz = ai[mbs];
1544 
1545   /* diagonals may have moved, so kill the diagonal pointers */
1546   a->idiagvalid = PETSC_FALSE;
1547   if (fshift && a->diag) {
1548     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1549     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1550     a->diag = 0;
1551   }
1552   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1553   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1554   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1555   a->reallocs          = 0;
1556   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1557 
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 
1562 
1563 /*
1564    This function returns an array of flags which indicate the locations of contiguous
1565    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1566    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1567    Assume: sizes should be long enough to hold all the values.
1568 */
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1572 {
1573   PetscInt        i,j,k,row;
1574   PetscTruth flg;
1575 
1576   PetscFunctionBegin;
1577   for (i=0,j=0; i<n; j++) {
1578     row = idx[i];
1579     if (row%bs!=0) { /* Not the begining of a block */
1580       sizes[j] = 1;
1581       i++;
1582     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1583       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1584       i++;
1585     } else { /* Begining of the block, so check if the complete block exists */
1586       flg = PETSC_TRUE;
1587       for (k=1; k<bs; k++) {
1588         if (row+k != idx[i+k]) { /* break in the block */
1589           flg = PETSC_FALSE;
1590           break;
1591         }
1592       }
1593       if (flg == PETSC_TRUE) { /* No break in the bs */
1594         sizes[j] = bs;
1595         i+= bs;
1596       } else {
1597         sizes[j] = 1;
1598         i++;
1599       }
1600     }
1601   }
1602   *bs_max = j;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1609 {
1610   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1611   PetscErrorCode ierr;
1612   PetscInt         i,j,k,count,is_n,*is_idx,*rows;
1613   PetscInt         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1614   PetscScalar zero = 0.0;
1615   MatScalar   *aa;
1616 
1617   PetscFunctionBegin;
1618   /* Make a copy of the IS and  sort it */
1619   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1620   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1621 
1622   /* allocate memory for rows,sizes */
1623   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1624   sizes = rows + is_n;
1625 
1626   /* copy IS values to rows, and sort them */
1627   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1628   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1629   if (baij->keepzeroedrows) {
1630     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1631     bs_max = is_n;
1632   } else {
1633     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1634   }
1635   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1636 
1637   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638     row   = rows[j];
1639     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1640     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643       if (diag) {
1644         if (baij->ilen[row/bs] > 0) {
1645           baij->ilen[row/bs]       = 1;
1646           baij->j[baij->i[row/bs]] = row/bs;
1647           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1648         }
1649         /* Now insert all the diagonal values for this bs */
1650         for (k=0; k<bs; k++) {
1651           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1652         }
1653       } else { /* (!diag) */
1654         baij->ilen[row/bs] = 0;
1655       } /* end (!diag) */
1656     } else { /* (sizes[i] != bs) */
1657 #if defined (PETSC_USE_DEBUG)
1658       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659 #endif
1660       for (k=0; k<count; k++) {
1661         aa[0] =  zero;
1662         aa    += bs;
1663       }
1664       if (diag) {
1665         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1666       }
1667     }
1668   }
1669 
1670   ierr = PetscFree(rows);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678 {
1679   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1680   PetscInt         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1681   PetscInt         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682   PetscInt         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1683   PetscErrorCode ierr;
1684   PetscInt         ridx,cidx,bs2=a->bs2;
1685   PetscTruth  roworiented=a->roworiented;
1686   MatScalar   *ap,value,*aa=a->a,*bap;
1687 
1688   PetscFunctionBegin;
1689   for (k=0; k<m; k++) { /* loop over added rows */
1690     row  = im[k]; brow = row/bs;
1691     if (row < 0) continue;
1692 #if defined(PETSC_USE_BOPT_g)
1693     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1694 #endif
1695     rp   = aj + ai[brow];
1696     ap   = aa + bs2*ai[brow];
1697     rmax = imax[brow];
1698     nrow = ailen[brow];
1699     low  = 0;
1700     for (l=0; l<n; l++) { /* loop over added columns */
1701       if (in[l] < 0) continue;
1702 #if defined(PETSC_USE_BOPT_g)
1703       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1704 #endif
1705       col = in[l]; bcol = col/bs;
1706       ridx = row % bs; cidx = col % bs;
1707       if (roworiented) {
1708         value = v[l + k*n];
1709       } else {
1710         value = v[k + l*m];
1711       }
1712       if (!sorted) low = 0; high = nrow;
1713       while (high-low > 7) {
1714         t = (low+high)/2;
1715         if (rp[t] > bcol) high = t;
1716         else              low  = t;
1717       }
1718       for (i=low; i<high; i++) {
1719         if (rp[i] > bcol) break;
1720         if (rp[i] == bcol) {
1721           bap  = ap +  bs2*i + bs*cidx + ridx;
1722           if (is == ADD_VALUES) *bap += value;
1723           else                  *bap  = value;
1724           goto noinsert1;
1725         }
1726       }
1727       if (nonew == 1) goto noinsert1;
1728       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1729       if (nrow >= rmax) {
1730         /* there is no extra room in row, therefore enlarge */
1731         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1732         MatScalar *new_a;
1733 
1734         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1735 
1736         /* Malloc new storage space */
1737         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1738 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1739         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1740         new_i   = new_j + new_nz;
1741 
1742         /* copy over old data into new slots */
1743         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1744         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1745         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1746         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1747         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1748         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1749         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1750         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1751         /* free up old matrix storage */
1752         ierr = PetscFree(a->a);CHKERRQ(ierr);
1753         if (!a->singlemalloc) {
1754           ierr = PetscFree(a->i);CHKERRQ(ierr);
1755           ierr = PetscFree(a->j);CHKERRQ(ierr);
1756         }
1757         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1758         a->singlemalloc = PETSC_TRUE;
1759 
1760         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1761         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1762         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1763         a->maxnz += bs2*CHUNKSIZE;
1764         a->reallocs++;
1765         a->nz++;
1766       }
1767       N = nrow++ - 1;
1768       /* shift up all the later entries in this row */
1769       for (ii=N; ii>=i; ii--) {
1770         rp[ii+1] = rp[ii];
1771         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1772       }
1773       if (N>=i) {
1774         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1775       }
1776       rp[i]                      = bcol;
1777       ap[bs2*i + bs*cidx + ridx] = value;
1778       noinsert1:;
1779       low = i;
1780     }
1781     ailen[brow] = nrow;
1782   }
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1789 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1790 {
1791   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1792   Mat         outA;
1793   PetscErrorCode ierr;
1794   PetscTruth  row_identity,col_identity;
1795 
1796   PetscFunctionBegin;
1797   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1798   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1799   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1800   if (!row_identity || !col_identity) {
1801     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1802   }
1803 
1804   outA          = inA;
1805   inA->factor   = FACTOR_LU;
1806 
1807   if (!a->diag) {
1808     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1809   }
1810 
1811   a->row        = row;
1812   a->col        = col;
1813   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1814   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1815 
1816   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1817   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1818   PetscLogObjectParent(inA,a->icol);
1819 
1820   /*
1821       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1822       for ILU(0) factorization with natural ordering
1823   */
1824   if (a->bs < 8) {
1825     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1826   } else {
1827     if (!a->solve_work) {
1828       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1829       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1830     }
1831   }
1832 
1833   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1839 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1840 {
1841   static PetscTruth called = PETSC_FALSE;
1842   MPI_Comm          comm = A->comm;
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1847   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1848   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 EXTERN_C_BEGIN
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1855 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1856 {
1857   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1858   PetscInt         i,nz,nbs;
1859 
1860   PetscFunctionBegin;
1861   nz  = baij->maxnz/baij->bs2;
1862   nbs = baij->nbs;
1863   for (i=0; i<nz; i++) {
1864     baij->j[i] = indices[i];
1865   }
1866   baij->nz = nz;
1867   for (i=0; i<nbs; i++) {
1868     baij->ilen[i] = baij->imax[i];
1869   }
1870 
1871   PetscFunctionReturn(0);
1872 }
1873 EXTERN_C_END
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1877 /*@
1878     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1879        in the matrix.
1880 
1881   Input Parameters:
1882 +  mat - the SeqBAIJ matrix
1883 -  indices - the column indices
1884 
1885   Level: advanced
1886 
1887   Notes:
1888     This can be called if you have precomputed the nonzero structure of the
1889   matrix and want to provide it to the matrix object to improve the performance
1890   of the MatSetValues() operation.
1891 
1892     You MUST have set the correct numbers of nonzeros per row in the call to
1893   MatCreateSeqBAIJ().
1894 
1895     MUST be called before any calls to MatSetValues();
1896 
1897 @*/
1898 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1899 {
1900   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1901 
1902   PetscFunctionBegin;
1903   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1904   PetscValidPointer(indices,2);
1905   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1906   if (f) {
1907     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1908   } else {
1909     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1916 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1917 {
1918   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920   PetscInt          i,j,n,row,bs,*ai,*aj,mbs;
1921   PetscReal    atmp;
1922   PetscScalar  *x,zero = 0.0;
1923   MatScalar    *aa;
1924   PetscInt          ncols,brow,krow,kcol;
1925 
1926   PetscFunctionBegin;
1927   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1928   bs   = a->bs;
1929   aa   = a->a;
1930   ai   = a->i;
1931   aj   = a->j;
1932   mbs = a->mbs;
1933 
1934   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1935   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1936   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1937   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1938   for (i=0; i<mbs; i++) {
1939     ncols = ai[1] - ai[0]; ai++;
1940     brow  = bs*i;
1941     for (j=0; j<ncols; j++){
1942       /* bcol = bs*(*aj); */
1943       for (kcol=0; kcol<bs; kcol++){
1944         for (krow=0; krow<bs; krow++){
1945           atmp = PetscAbsScalar(*aa); aa++;
1946           row = brow + krow;    /* row index */
1947           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1948           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1949         }
1950       }
1951       aj++;
1952     }
1953   }
1954   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1960 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1961 {
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1971 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1972 {
1973   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1974   PetscFunctionBegin;
1975   *array = a->a;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1981 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1982 {
1983   PetscFunctionBegin;
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #include "petscblaslapack.h"
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1990 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1991 {
1992   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1993   PetscErrorCode ierr;
1994   PetscInt            i,bs=y->bs,j,bs2;
1995   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1996 
1997   PetscFunctionBegin;
1998   if (str == SAME_NONZERO_PATTERN) {
1999     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2000   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2001     if (y->xtoy && y->XtoY != X) {
2002       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2003       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2004     }
2005     if (!y->xtoy) { /* get xtoy */
2006       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2007       y->XtoY = X;
2008     }
2009     bs2 = bs*bs;
2010     for (i=0; i<x->nz; i++) {
2011       j = 0;
2012       while (j < bs2){
2013         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2014         j++;
2015       }
2016     }
2017     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2018   } else {
2019     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2020   }
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 /* -------------------------------------------------------------------*/
2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2026        MatGetRow_SeqBAIJ,
2027        MatRestoreRow_SeqBAIJ,
2028        MatMult_SeqBAIJ_N,
2029 /* 4*/ MatMultAdd_SeqBAIJ_N,
2030        MatMultTranspose_SeqBAIJ,
2031        MatMultTransposeAdd_SeqBAIJ,
2032        MatSolve_SeqBAIJ_N,
2033        0,
2034        0,
2035 /*10*/ 0,
2036        MatLUFactor_SeqBAIJ,
2037        0,
2038        0,
2039        MatTranspose_SeqBAIJ,
2040 /*15*/ MatGetInfo_SeqBAIJ,
2041        MatEqual_SeqBAIJ,
2042        MatGetDiagonal_SeqBAIJ,
2043        MatDiagonalScale_SeqBAIJ,
2044        MatNorm_SeqBAIJ,
2045 /*20*/ 0,
2046        MatAssemblyEnd_SeqBAIJ,
2047        0,
2048        MatSetOption_SeqBAIJ,
2049        MatZeroEntries_SeqBAIJ,
2050 /*25*/ MatZeroRows_SeqBAIJ,
2051        MatLUFactorSymbolic_SeqBAIJ,
2052        MatLUFactorNumeric_SeqBAIJ_N,
2053        0,
2054        0,
2055 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2056        MatILUFactorSymbolic_SeqBAIJ,
2057        0,
2058        MatGetArray_SeqBAIJ,
2059        MatRestoreArray_SeqBAIJ,
2060 /*35*/ MatDuplicate_SeqBAIJ,
2061        0,
2062        0,
2063        MatILUFactor_SeqBAIJ,
2064        0,
2065 /*40*/ MatAXPY_SeqBAIJ,
2066        MatGetSubMatrices_SeqBAIJ,
2067        MatIncreaseOverlap_SeqBAIJ,
2068        MatGetValues_SeqBAIJ,
2069        0,
2070 /*45*/ MatPrintHelp_SeqBAIJ,
2071        MatScale_SeqBAIJ,
2072        0,
2073        0,
2074        0,
2075 /*50*/ MatGetBlockSize_SeqBAIJ,
2076        MatGetRowIJ_SeqBAIJ,
2077        MatRestoreRowIJ_SeqBAIJ,
2078        0,
2079        0,
2080 /*55*/ 0,
2081        0,
2082        0,
2083        0,
2084        MatSetValuesBlocked_SeqBAIJ,
2085 /*60*/ MatGetSubMatrix_SeqBAIJ,
2086        MatDestroy_SeqBAIJ,
2087        MatView_SeqBAIJ,
2088        MatGetPetscMaps_Petsc,
2089        0,
2090 /*65*/ 0,
2091        0,
2092        0,
2093        0,
2094        0,
2095 /*70*/ MatGetRowMax_SeqBAIJ,
2096        MatConvert_Basic,
2097        0,
2098        0,
2099        0,
2100 /*75*/ 0,
2101        0,
2102        0,
2103        0,
2104        0,
2105 /*80*/ 0,
2106        0,
2107        0,
2108        0,
2109        MatLoad_SeqBAIJ,
2110 /*85*/ 0,
2111        0,
2112        0,
2113        0,
2114        0,
2115 /*90*/ 0,
2116        0,
2117        0,
2118        0,
2119        0,
2120 /*95*/ 0,
2121        0,
2122        0,
2123        0};
2124 
2125 EXTERN_C_BEGIN
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2128 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2129 {
2130   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2131   PetscInt         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   if (aij->nonew != 1) {
2136     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2137   }
2138 
2139   /* allocate space for values if not already there */
2140   if (!aij->saved_values) {
2141     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2142   }
2143 
2144   /* copy values over */
2145   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 EXTERN_C_END
2149 
2150 EXTERN_C_BEGIN
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2153 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2154 {
2155   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2156   PetscErrorCode ierr;
2157   PetscInt         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2158 
2159   PetscFunctionBegin;
2160   if (aij->nonew != 1) {
2161     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2162   }
2163   if (!aij->saved_values) {
2164     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2165   }
2166 
2167   /* copy values over */
2168   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 EXTERN_C_END
2172 
2173 EXTERN_C_BEGIN
2174 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2175 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2176 EXTERN_C_END
2177 
2178 EXTERN_C_BEGIN
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2181 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2182 {
2183   Mat_SeqBAIJ *b;
2184   PetscErrorCode ierr;
2185   PetscInt         i,len,mbs,nbs,bs2,newbs = bs;
2186   PetscTruth  flg;
2187 
2188   PetscFunctionBegin;
2189 
2190   B->preallocated = PETSC_TRUE;
2191   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2192   if (nnz && newbs != bs) {
2193     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2194   }
2195   bs   = newbs;
2196 
2197   mbs  = B->m/bs;
2198   nbs  = B->n/bs;
2199   bs2  = bs*bs;
2200 
2201   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2202     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2203   }
2204 
2205   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2206   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2207   if (nnz) {
2208     for (i=0; i<mbs; i++) {
2209       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2210       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2211     }
2212   }
2213 
2214   b       = (Mat_SeqBAIJ*)B->data;
2215   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2216   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2217   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2218   if (!flg) {
2219     switch (bs) {
2220     case 1:
2221       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2222       B->ops->mult            = MatMult_SeqBAIJ_1;
2223       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2224       break;
2225     case 2:
2226       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2227       B->ops->mult            = MatMult_SeqBAIJ_2;
2228       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2229       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2230       break;
2231     case 3:
2232       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2233       B->ops->mult            = MatMult_SeqBAIJ_3;
2234       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2235       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2236       break;
2237     case 4:
2238       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2239       B->ops->mult            = MatMult_SeqBAIJ_4;
2240       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2241       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2242       break;
2243     case 5:
2244       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2245       B->ops->mult            = MatMult_SeqBAIJ_5;
2246       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2247       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2248       break;
2249     case 6:
2250       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2251       B->ops->mult            = MatMult_SeqBAIJ_6;
2252       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2253       break;
2254     case 7:
2255       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2256       B->ops->mult            = MatMult_SeqBAIJ_7;
2257       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2258       break;
2259     default:
2260       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2261       B->ops->mult            = MatMult_SeqBAIJ_N;
2262       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2263       break;
2264     }
2265   }
2266   b->bs      = bs;
2267   b->mbs     = mbs;
2268   b->nbs     = nbs;
2269   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2270   if (!nnz) {
2271     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2272     else if (nz <= 0)        nz = 1;
2273     for (i=0; i<mbs; i++) b->imax[i] = nz;
2274     nz = nz*mbs;
2275   } else {
2276     nz = 0;
2277     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2278   }
2279 
2280   /* allocate the matrix space */
2281   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2282   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2283   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2284   b->j  = (PetscInt*)(b->a + nz*bs2);
2285   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2286   b->i  = b->j + nz;
2287   b->singlemalloc = PETSC_TRUE;
2288 
2289   b->i[0] = 0;
2290   for (i=1; i<mbs+1; i++) {
2291     b->i[i] = b->i[i-1] + b->imax[i-1];
2292   }
2293 
2294   /* b->ilen will count nonzeros in each block row so far. */
2295   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2296   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2297   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2298 
2299   b->bs               = bs;
2300   b->bs2              = bs2;
2301   b->mbs              = mbs;
2302   b->nz               = 0;
2303   b->maxnz            = nz*bs2;
2304   B->info.nz_unneeded = (PetscReal)b->maxnz;
2305   PetscFunctionReturn(0);
2306 }
2307 EXTERN_C_END
2308 
2309 /*MC
2310    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2311    block sparse compressed row format.
2312 
2313    Options Database Keys:
2314 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2315 
2316   Level: beginner
2317 
2318 .seealso: MatCreateSeqBAIJ
2319 M*/
2320 
2321 EXTERN_C_BEGIN
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatCreate_SeqBAIJ"
2324 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2325 {
2326   PetscErrorCode ierr;
2327   PetscMPIInt    size;
2328   Mat_SeqBAIJ    *b;
2329 
2330   PetscFunctionBegin;
2331   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2332   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2333 
2334   B->m = B->M = PetscMax(B->m,B->M);
2335   B->n = B->N = PetscMax(B->n,B->N);
2336   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2337   B->data = (void*)b;
2338   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2339   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2340   B->factor           = 0;
2341   B->lupivotthreshold = 1.0;
2342   B->mapping          = 0;
2343   b->row              = 0;
2344   b->col              = 0;
2345   b->icol             = 0;
2346   b->reallocs         = 0;
2347   b->saved_values     = 0;
2348 #if defined(PETSC_USE_MAT_SINGLE)
2349   b->setvalueslen     = 0;
2350   b->setvaluescopy    = PETSC_NULL;
2351 #endif
2352 
2353   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2354   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2355 
2356   b->sorted           = PETSC_FALSE;
2357   b->roworiented      = PETSC_TRUE;
2358   b->nonew            = 0;
2359   b->diag             = 0;
2360   b->solve_work       = 0;
2361   b->mult_work        = 0;
2362   B->spptr            = 0;
2363   B->info.nz_unneeded = (PetscReal)b->maxnz;
2364   b->keepzeroedrows   = PETSC_FALSE;
2365   b->xtoy              = 0;
2366   b->XtoY              = 0;
2367 
2368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2369                                      "MatStoreValues_SeqBAIJ",
2370                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2372                                      "MatRetrieveValues_SeqBAIJ",
2373                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2375                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2376                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2378                                      "MatConvert_SeqBAIJ_SeqAIJ",
2379                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2381                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2382                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2384                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2385                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 EXTERN_C_END
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2392 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2393 {
2394   Mat         C;
2395   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2398 
2399   PetscFunctionBegin;
2400   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2401 
2402   *B = 0;
2403   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2404   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2405   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2406   c    = (Mat_SeqBAIJ*)C->data;
2407 
2408   C->M   = A->M;
2409   C->N   = A->N;
2410   c->bs  = a->bs;
2411   c->bs2 = a->bs2;
2412   c->mbs = a->mbs;
2413   c->nbs = a->nbs;
2414 
2415   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2416   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2417   for (i=0; i<mbs; i++) {
2418     c->imax[i] = a->imax[i];
2419     c->ilen[i] = a->ilen[i];
2420   }
2421 
2422   /* allocate the matrix space */
2423   c->singlemalloc = PETSC_TRUE;
2424   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2425   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2426   c->j = (int*)(c->a + nz*bs2);
2427   c->i = c->j + nz;
2428   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2429   if (mbs > 0) {
2430     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2431     if (cpvalues == MAT_COPY_VALUES) {
2432       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2433     } else {
2434       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2435     }
2436   }
2437 
2438   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2439   c->sorted      = a->sorted;
2440   c->roworiented = a->roworiented;
2441   c->nonew       = a->nonew;
2442 
2443   if (a->diag) {
2444     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2445     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2446     for (i=0; i<mbs; i++) {
2447       c->diag[i] = a->diag[i];
2448     }
2449   } else c->diag        = 0;
2450   c->nz                 = a->nz;
2451   c->maxnz              = a->maxnz;
2452   c->solve_work         = 0;
2453   c->mult_work          = 0;
2454   C->preallocated       = PETSC_TRUE;
2455   C->assembled          = PETSC_TRUE;
2456   *B = C;
2457   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatLoad_SeqBAIJ"
2463 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2464 {
2465   Mat_SeqBAIJ  *a;
2466   Mat          B;
2467   PetscErrorCode ierr;
2468   PetscInt          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2469   PetscInt          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2470   PetscInt          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2471   PetscInt          *masked,nmask,tmp,bs2,ishift;
2472   PetscScalar  *aa;
2473   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2474 
2475   PetscFunctionBegin;
2476   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2477   bs2  = bs*bs;
2478 
2479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2480   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2481   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2482   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2483   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2484   M = header[1]; N = header[2]; nz = header[3];
2485 
2486   if (header[3] < 0) {
2487     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2488   }
2489 
2490   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2491 
2492   /*
2493      This code adds extra rows to make sure the number of rows is
2494     divisible by the blocksize
2495   */
2496   mbs        = M/bs;
2497   extra_rows = bs - M + bs*(mbs);
2498   if (extra_rows == bs) extra_rows = 0;
2499   else                  mbs++;
2500   if (extra_rows) {
2501     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2502   }
2503 
2504   /* read in row lengths */
2505   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2506   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2507   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2508 
2509   /* read in column indices */
2510   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2511   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2512   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2513 
2514   /* loop over row lengths determining block row lengths */
2515   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2516   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2517   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2518   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2519   masked   = mask + mbs;
2520   rowcount = 0; nzcount = 0;
2521   for (i=0; i<mbs; i++) {
2522     nmask = 0;
2523     for (j=0; j<bs; j++) {
2524       kmax = rowlengths[rowcount];
2525       for (k=0; k<kmax; k++) {
2526         tmp = jj[nzcount++]/bs;
2527         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2528       }
2529       rowcount++;
2530     }
2531     browlengths[i] += nmask;
2532     /* zero out the mask elements we set */
2533     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2534   }
2535 
2536   /* create our matrix */
2537   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2538   ierr = MatSetType(B,type);CHKERRQ(ierr);
2539   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2540   a = (Mat_SeqBAIJ*)B->data;
2541 
2542   /* set matrix "i" values */
2543   a->i[0] = 0;
2544   for (i=1; i<= mbs; i++) {
2545     a->i[i]      = a->i[i-1] + browlengths[i-1];
2546     a->ilen[i-1] = browlengths[i-1];
2547   }
2548   a->nz         = 0;
2549   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2550 
2551   /* read in nonzero values */
2552   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2553   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2554   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2555 
2556   /* set "a" and "j" values into matrix */
2557   nzcount = 0; jcount = 0;
2558   for (i=0; i<mbs; i++) {
2559     nzcountb = nzcount;
2560     nmask    = 0;
2561     for (j=0; j<bs; j++) {
2562       kmax = rowlengths[i*bs+j];
2563       for (k=0; k<kmax; k++) {
2564         tmp = jj[nzcount++]/bs;
2565 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2566       }
2567     }
2568     /* sort the masked values */
2569     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2570 
2571     /* set "j" values into matrix */
2572     maskcount = 1;
2573     for (j=0; j<nmask; j++) {
2574       a->j[jcount++]  = masked[j];
2575       mask[masked[j]] = maskcount++;
2576     }
2577     /* set "a" values into matrix */
2578     ishift = bs2*a->i[i];
2579     for (j=0; j<bs; j++) {
2580       kmax = rowlengths[i*bs+j];
2581       for (k=0; k<kmax; k++) {
2582         tmp       = jj[nzcountb]/bs ;
2583         block     = mask[tmp] - 1;
2584         point     = jj[nzcountb] - bs*tmp;
2585         idx       = ishift + bs2*block + j + bs*point;
2586         a->a[idx] = (MatScalar)aa[nzcountb++];
2587       }
2588     }
2589     /* zero out the mask elements we set */
2590     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2591   }
2592   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2593 
2594   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2595   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2596   ierr = PetscFree(aa);CHKERRQ(ierr);
2597   ierr = PetscFree(jj);CHKERRQ(ierr);
2598   ierr = PetscFree(mask);CHKERRQ(ierr);
2599 
2600   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2601   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2602   ierr = MatView_Private(B);CHKERRQ(ierr);
2603 
2604   *A = B;
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatCreateSeqBAIJ"
2610 /*@C
2611    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2612    compressed row) format.  For good matrix assembly performance the
2613    user should preallocate the matrix storage by setting the parameter nz
2614    (or the array nnz).  By setting these parameters accurately, performance
2615    during matrix assembly can be increased by more than a factor of 50.
2616 
2617    Collective on MPI_Comm
2618 
2619    Input Parameters:
2620 +  comm - MPI communicator, set to PETSC_COMM_SELF
2621 .  bs - size of block
2622 .  m - number of rows
2623 .  n - number of columns
2624 .  nz - number of nonzero blocks  per block row (same for all rows)
2625 -  nnz - array containing the number of nonzero blocks in the various block rows
2626          (possibly different for each block row) or PETSC_NULL
2627 
2628    Output Parameter:
2629 .  A - the matrix
2630 
2631    Options Database Keys:
2632 .   -mat_no_unroll - uses code that does not unroll the loops in the
2633                      block calculations (much slower)
2634 .    -mat_block_size - size of the blocks to use
2635 
2636    Level: intermediate
2637 
2638    Notes:
2639    A nonzero block is any block that as 1 or more nonzeros in it
2640 
2641    The block AIJ format is fully compatible with standard Fortran 77
2642    storage.  That is, the stored row and column indices can begin at
2643    either one (as in Fortran) or zero.  See the users' manual for details.
2644 
2645    Specify the preallocated storage with either nz or nnz (not both).
2646    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2647    allocation.  For additional details, see the users manual chapter on
2648    matrices.
2649 
2650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2651 @*/
2652 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2653 {
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2658   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2659   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2665 /*@C
2666    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2667    per row in the matrix. For good matrix assembly performance the
2668    user should preallocate the matrix storage by setting the parameter nz
2669    (or the array nnz).  By setting these parameters accurately, performance
2670    during matrix assembly can be increased by more than a factor of 50.
2671 
2672    Collective on MPI_Comm
2673 
2674    Input Parameters:
2675 +  A - the matrix
2676 .  bs - size of block
2677 .  nz - number of block nonzeros per block row (same for all rows)
2678 -  nnz - array containing the number of block nonzeros in the various block rows
2679          (possibly different for each block row) or PETSC_NULL
2680 
2681    Options Database Keys:
2682 .   -mat_no_unroll - uses code that does not unroll the loops in the
2683                      block calculations (much slower)
2684 .    -mat_block_size - size of the blocks to use
2685 
2686    Level: intermediate
2687 
2688    Notes:
2689    The block AIJ format is fully compatible with standard Fortran 77
2690    storage.  That is, the stored row and column indices can begin at
2691    either one (as in Fortran) or zero.  See the users' manual for details.
2692 
2693    Specify the preallocated storage with either nz or nnz (not both).
2694    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2695    allocation.  For additional details, see the users manual chapter on
2696    matrices.
2697 
2698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2699 @*/
2700 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2701 {
2702   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2703 
2704   PetscFunctionBegin;
2705   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2706   if (f) {
2707     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2708   }
2709   PetscFunctionReturn(0);
2710 }
2711 
2712