xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9ae221ffe77a9fbfac348b4ed5abed9f5e2e2e41)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
18   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode     ierr;
90   PetscInt           m = a->mbs,i,i2,nz,idx;
91   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode     ierr;
192   PetscInt           m = a->mbs,i,i2,nz,idx;
193   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       PetscLogFlops(9*(a->nz));
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       PetscLogFlops(15*m);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       PetscLogFlops(9*(a->nz));
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode     ierr;
301   PetscInt           m = a->mbs,i,i2,nz,idx;
302   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       PetscLogFlops(16*(a->nz));
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       PetscLogFlops(28*m);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       PetscLogFlops(16*(a->nz));
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode     ierr;
417   PetscInt           m = a->mbs,i,i2,nz,idx;
418   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       PetscLogFlops(25*(a->nz));
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       PetscLogFlops(45*m);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       PetscLogFlops(25*(a->nz));
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   PetscInt          *ai=a->i,*ailen=a->ilen;
550   PetscInt          *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   PetscInt    *ai=a->i,*ailen=a->ilen;
622   PetscInt    *aj=a->j,brow,bcol;
623   PetscInt    ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   PetscInt       *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   PetscInt       i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
719   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   PetscInt       n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     PetscInt nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     PetscInt nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatDestroy_SeqBAIJ"
785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791 #if defined(PETSC_USE_LOG)
792   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
793 #endif
794   ierr = PetscFree(a->a);CHKERRQ(ierr);
795   if (!a->singlemalloc) {
796     ierr = PetscFree(a->i);CHKERRQ(ierr);
797     ierr = PetscFree(a->j);CHKERRQ(ierr);
798   }
799   if (a->row) {
800     ierr = ISDestroy(a->row);CHKERRQ(ierr);
801   }
802   if (a->col) {
803     ierr = ISDestroy(a->col);CHKERRQ(ierr);
804   }
805   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
806   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
807   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
808   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
809   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
810   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
811   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
812   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
813 #if defined(PETSC_USE_MAT_SINGLE)
814   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
815 #endif
816   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
817   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
818 
819   ierr = PetscFree(a);CHKERRQ(ierr);
820 
821   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
822   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
823   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
824   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
825   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
826   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSetOption_SeqBAIJ"
832 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
833 {
834   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
835 
836   PetscFunctionBegin;
837   switch (op) {
838   case MAT_ROW_ORIENTED:
839     a->roworiented    = PETSC_TRUE;
840     break;
841   case MAT_COLUMN_ORIENTED:
842     a->roworiented    = PETSC_FALSE;
843     break;
844   case MAT_COLUMNS_SORTED:
845     a->sorted         = PETSC_TRUE;
846     break;
847   case MAT_COLUMNS_UNSORTED:
848     a->sorted         = PETSC_FALSE;
849     break;
850   case MAT_KEEP_ZEROED_ROWS:
851     a->keepzeroedrows = PETSC_TRUE;
852     break;
853   case MAT_NO_NEW_NONZERO_LOCATIONS:
854     a->nonew          = 1;
855     break;
856   case MAT_NEW_NONZERO_LOCATION_ERR:
857     a->nonew          = -1;
858     break;
859   case MAT_NEW_NONZERO_ALLOCATION_ERR:
860     a->nonew          = -2;
861     break;
862   case MAT_YES_NEW_NONZERO_LOCATIONS:
863     a->nonew          = 0;
864     break;
865   case MAT_ROWS_SORTED:
866   case MAT_ROWS_UNSORTED:
867   case MAT_YES_NEW_DIAGONALS:
868   case MAT_IGNORE_OFF_PROC_ENTRIES:
869   case MAT_USE_HASH_TABLE:
870     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
871     break;
872   case MAT_NO_NEW_DIAGONALS:
873     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
874   case MAT_SYMMETRIC:
875   case MAT_STRUCTURALLY_SYMMETRIC:
876   case MAT_NOT_SYMMETRIC:
877   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
878   case MAT_HERMITIAN:
879   case MAT_NOT_HERMITIAN:
880   case MAT_SYMMETRY_ETERNAL:
881   case MAT_NOT_SYMMETRY_ETERNAL:
882     break;
883   default:
884     SETERRQ(PETSC_ERR_SUP,"unknown option");
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatGetRow_SeqBAIJ"
891 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
892 {
893   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
894   PetscErrorCode ierr;
895   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
896   MatScalar      *aa,*aa_i;
897   PetscScalar    *v_i;
898 
899   PetscFunctionBegin;
900   bs  = A->bs;
901   ai  = a->i;
902   aj  = a->j;
903   aa  = a->a;
904   bs2 = a->bs2;
905 
906   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
907 
908   bn  = row/bs;   /* Block number */
909   bp  = row % bs; /* Block Position */
910   M   = ai[bn+1] - ai[bn];
911   *nz = bs*M;
912 
913   if (v) {
914     *v = 0;
915     if (*nz) {
916       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
917       for (i=0; i<M; i++) { /* for each block in the block row */
918         v_i  = *v + i*bs;
919         aa_i = aa + bs2*(ai[bn] + i);
920         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
921       }
922     }
923   }
924 
925   if (idx) {
926     *idx = 0;
927     if (*nz) {
928       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
929       for (i=0; i<M; i++) { /* for each block in the block row */
930         idx_i = *idx + i*bs;
931         itmp  = bs*aj[ai[bn] + i];
932         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
933       }
934     }
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
941 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
942 {
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
947   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatTranspose_SeqBAIJ"
953 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
954 {
955   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
956   Mat            C;
957   PetscErrorCode ierr;
958   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
959   PetscInt       *rows,*cols,bs2=a->bs2;
960   PetscScalar    *array;
961 
962   PetscFunctionBegin;
963   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
964   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
965   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
966 
967 #if defined(PETSC_USE_MAT_SINGLE)
968   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
969   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
970 #else
971   array = a->a;
972 #endif
973 
974   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
975   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
976   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
977   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
978   ierr = PetscFree(col);CHKERRQ(ierr);
979   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
980   cols = rows + bs;
981   for (i=0; i<mbs; i++) {
982     cols[0] = i*bs;
983     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
984     len = ai[i+1] - ai[i];
985     for (j=0; j<len; j++) {
986       rows[0] = (*aj++)*bs;
987       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
988       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
989       array += bs2;
990     }
991   }
992   ierr = PetscFree(rows);CHKERRQ(ierr);
993 #if defined(PETSC_USE_MAT_SINGLE)
994   ierr = PetscFree(array);
995 #endif
996 
997   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
998   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999 
1000   if (B) {
1001     *B = C;
1002   } else {
1003     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1004   }
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1010 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1011 {
1012   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1013   PetscErrorCode ierr;
1014   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1015   int            fd;
1016   PetscScalar    *aa;
1017   FILE           *file;
1018 
1019   PetscFunctionBegin;
1020   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1021   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1022   col_lens[0] = MAT_FILE_COOKIE;
1023 
1024   col_lens[1] = A->m;
1025   col_lens[2] = A->n;
1026   col_lens[3] = a->nz*bs2;
1027 
1028   /* store lengths of each row and write (including header) to file */
1029   count = 0;
1030   for (i=0; i<a->mbs; i++) {
1031     for (j=0; j<bs; j++) {
1032       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1033     }
1034   }
1035   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1036   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1037 
1038   /* store column indices (zero start index) */
1039   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1040   count = 0;
1041   for (i=0; i<a->mbs; i++) {
1042     for (j=0; j<bs; j++) {
1043       for (k=a->i[i]; k<a->i[i+1]; k++) {
1044         for (l=0; l<bs; l++) {
1045           jj[count++] = bs*a->j[k] + l;
1046         }
1047       }
1048     }
1049   }
1050   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1051   ierr = PetscFree(jj);CHKERRQ(ierr);
1052 
1053   /* store nonzero values */
1054   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1055   count = 0;
1056   for (i=0; i<a->mbs; i++) {
1057     for (j=0; j<bs; j++) {
1058       for (k=a->i[i]; k<a->i[i+1]; k++) {
1059         for (l=0; l<bs; l++) {
1060           aa[count++] = a->a[bs2*k + l*bs + j];
1061         }
1062       }
1063     }
1064   }
1065   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1066   ierr = PetscFree(aa);CHKERRQ(ierr);
1067 
1068   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1069   if (file) {
1070     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1071   }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1077 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1078 {
1079   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1080   PetscErrorCode    ierr;
1081   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1082   PetscViewerFormat format;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1086   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1087     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1088   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1089     Mat aij;
1090     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1091     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1092     ierr = MatDestroy(aij);CHKERRQ(ierr);
1093   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1094      PetscFunctionReturn(0);
1095   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1096     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1097     for (i=0; i<a->mbs; i++) {
1098       for (j=0; j<bs; j++) {
1099         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1100         for (k=a->i[i]; k<a->i[i+1]; k++) {
1101           for (l=0; l<bs; l++) {
1102 #if defined(PETSC_USE_COMPLEX)
1103             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1104               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1105                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1106             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1107               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1108                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1109             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1110               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1111             }
1112 #else
1113             if (a->a[bs2*k + l*bs + j] != 0.0) {
1114               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1115             }
1116 #endif
1117           }
1118         }
1119         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1120       }
1121     }
1122     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1123   } else {
1124     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1125     for (i=0; i<a->mbs; i++) {
1126       for (j=0; j<bs; j++) {
1127         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1128         for (k=a->i[i]; k<a->i[i+1]; k++) {
1129           for (l=0; l<bs; l++) {
1130 #if defined(PETSC_USE_COMPLEX)
1131             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1132               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1133                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1134             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1135               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1136                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1137             } else {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1139             }
1140 #else
1141             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1142 #endif
1143           }
1144         }
1145         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1146       }
1147     }
1148     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1149   }
1150   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1156 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1157 {
1158   Mat            A = (Mat) Aa;
1159   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1162   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1163   MatScalar      *aa;
1164   PetscViewer    viewer;
1165 
1166   PetscFunctionBegin;
1167 
1168   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1169   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1170 
1171   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1172 
1173   /* loop over matrix elements drawing boxes */
1174   color = PETSC_DRAW_BLUE;
1175   for (i=0,row=0; i<mbs; i++,row+=bs) {
1176     for (j=a->i[i]; j<a->i[i+1]; j++) {
1177       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1178       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1179       aa = a->a + j*bs2;
1180       for (k=0; k<bs; k++) {
1181         for (l=0; l<bs; l++) {
1182           if (PetscRealPart(*aa++) >=  0.) continue;
1183           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1184         }
1185       }
1186     }
1187   }
1188   color = PETSC_DRAW_CYAN;
1189   for (i=0,row=0; i<mbs; i++,row+=bs) {
1190     for (j=a->i[i]; j<a->i[i+1]; j++) {
1191       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1192       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1193       aa = a->a + j*bs2;
1194       for (k=0; k<bs; k++) {
1195         for (l=0; l<bs; l++) {
1196           if (PetscRealPart(*aa++) != 0.) continue;
1197           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1198         }
1199       }
1200     }
1201   }
1202 
1203   color = PETSC_DRAW_RED;
1204   for (i=0,row=0; i<mbs; i++,row+=bs) {
1205     for (j=a->i[i]; j<a->i[i+1]; j++) {
1206       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1207       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1208       aa = a->a + j*bs2;
1209       for (k=0; k<bs; k++) {
1210         for (l=0; l<bs; l++) {
1211           if (PetscRealPart(*aa++) <= 0.) continue;
1212           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1213         }
1214       }
1215     }
1216   }
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1222 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1223 {
1224   PetscErrorCode ierr;
1225   PetscReal      xl,yl,xr,yr,w,h;
1226   PetscDraw      draw;
1227   PetscTruth     isnull;
1228 
1229   PetscFunctionBegin;
1230 
1231   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1232   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1233 
1234   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1235   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1236   xr += w;    yr += h;  xl = -w;     yl = -h;
1237   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1238   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1239   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_SeqBAIJ"
1245 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscTruth     iascii,isbinary,isdraw;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1253   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1254   if (iascii){
1255     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1258   } else if (isdraw) {
1259     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1260   } else {
1261     Mat B;
1262     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1263     ierr = MatView(B,viewer);CHKERRQ(ierr);
1264     ierr = MatDestroy(B);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1272 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1273 {
1274   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1275   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1276   PetscInt    *ai = a->i,*ailen = a->ilen;
1277   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1278   MatScalar   *ap,*aa = a->a,zero = 0.0;
1279 
1280   PetscFunctionBegin;
1281   for (k=0; k<m; k++) { /* loop over rows */
1282     row  = im[k]; brow = row/bs;
1283     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1284     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1285     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1286     nrow = ailen[brow];
1287     for (l=0; l<n; l++) { /* loop over columns */
1288       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1289       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1290       col  = in[l] ;
1291       bcol = col/bs;
1292       cidx = col%bs;
1293       ridx = row%bs;
1294       high = nrow;
1295       low  = 0; /* assume unsorted */
1296       while (high-low > 5) {
1297         t = (low+high)/2;
1298         if (rp[t] > bcol) high = t;
1299         else             low  = t;
1300       }
1301       for (i=low; i<high; i++) {
1302         if (rp[i] > bcol) break;
1303         if (rp[i] == bcol) {
1304           *v++ = ap[bs2*i+bs*cidx+ridx];
1305           goto finished;
1306         }
1307       }
1308       *v++ = zero;
1309       finished:;
1310     }
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #if defined(PETSC_USE_MAT_SINGLE)
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1318 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1319 {
1320   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1321   PetscErrorCode ierr;
1322   PetscInt       i,N = m*n*b->bs2;
1323   MatScalar      *vsingle;
1324 
1325   PetscFunctionBegin;
1326   if (N > b->setvalueslen) {
1327     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1328     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1329     b->setvalueslen  = N;
1330   }
1331   vsingle = b->setvaluescopy;
1332   for (i=0; i<N; i++) {
1333     vsingle[i] = v[i];
1334   }
1335   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 #endif
1339 
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1343 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1344 {
1345   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1346   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1347   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1348   PetscErrorCode  ierr;
1349   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1350   PetscTruth      roworiented=a->roworiented;
1351   const MatScalar *value = v;
1352   MatScalar       *ap,*aa = a->a,*bap;
1353 
1354   PetscFunctionBegin;
1355   if (roworiented) {
1356     stepval = (n-1)*bs;
1357   } else {
1358     stepval = (m-1)*bs;
1359   }
1360   for (k=0; k<m; k++) { /* loop over added rows */
1361     row  = im[k];
1362     if (row < 0) continue;
1363 #if defined(PETSC_USE_DEBUG)
1364     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1365 #endif
1366     rp   = aj + ai[row];
1367     ap   = aa + bs2*ai[row];
1368     rmax = imax[row];
1369     nrow = ailen[row];
1370     low  = 0;
1371     high = nrow;
1372     for (l=0; l<n; l++) { /* loop over added columns */
1373       if (in[l] < 0) continue;
1374 #if defined(PETSC_USE_DEBUG)
1375       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1376 #endif
1377       col = in[l];
1378       if (roworiented) {
1379         value = v + k*(stepval+bs)*bs + l*bs;
1380       } else {
1381         value = v + l*(stepval+bs)*bs + k*bs;
1382       }
1383       if (col < lastcol) low = 0; else high = nrow;
1384       lastcol = col;
1385       while (high-low > 7) {
1386         t = (low+high)/2;
1387         if (rp[t] > col) high = t;
1388         else             low  = t;
1389       }
1390       for (i=low; i<high; i++) {
1391         if (rp[i] > col) break;
1392         if (rp[i] == col) {
1393           bap  = ap +  bs2*i;
1394           if (roworiented) {
1395             if (is == ADD_VALUES) {
1396               for (ii=0; ii<bs; ii++,value+=stepval) {
1397                 for (jj=ii; jj<bs2; jj+=bs) {
1398                   bap[jj] += *value++;
1399                 }
1400               }
1401             } else {
1402               for (ii=0; ii<bs; ii++,value+=stepval) {
1403                 for (jj=ii; jj<bs2; jj+=bs) {
1404                   bap[jj] = *value++;
1405                 }
1406               }
1407             }
1408           } else {
1409             if (is == ADD_VALUES) {
1410               for (ii=0; ii<bs; ii++,value+=stepval) {
1411                 for (jj=0; jj<bs; jj++) {
1412                   *bap++ += *value++;
1413                 }
1414               }
1415             } else {
1416               for (ii=0; ii<bs; ii++,value+=stepval) {
1417                 for (jj=0; jj<bs; jj++) {
1418                   *bap++  = *value++;
1419                 }
1420               }
1421             }
1422           }
1423           goto noinsert2;
1424         }
1425       }
1426       if (nonew == 1) goto noinsert2;
1427       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1428       if (nrow >= rmax) {
1429         /* there is no extra room in row, therefore enlarge */
1430         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1431         MatScalar *new_a;
1432 
1433         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1434 
1435         /* malloc new storage space */
1436         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1437 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1438         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1439         new_i   = new_j + new_nz;
1440 
1441         /* copy over old data into new slots */
1442         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1443         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1444         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1445         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1446         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1447         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1448         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1449         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1450         /* free up old matrix storage */
1451         ierr = PetscFree(a->a);CHKERRQ(ierr);
1452         if (!a->singlemalloc) {
1453           ierr = PetscFree(a->i);CHKERRQ(ierr);
1454           ierr = PetscFree(a->j);CHKERRQ(ierr);
1455         }
1456         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1457         a->singlemalloc = PETSC_TRUE;
1458 
1459         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1460         rmax = imax[row] = imax[row] + CHUNKSIZE;
1461         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1462         a->maxnz += bs2*CHUNKSIZE;
1463         a->reallocs++;
1464         a->nz++;
1465       }
1466       N = nrow++ - 1;
1467       /* shift up all the later entries in this row */
1468       for (ii=N; ii>=i; ii--) {
1469         rp[ii+1] = rp[ii];
1470         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1471       }
1472       if (N >= i) {
1473         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1474       }
1475       rp[i] = col;
1476       bap   = ap +  bs2*i;
1477       if (roworiented) {
1478         for (ii=0; ii<bs; ii++,value+=stepval) {
1479           for (jj=ii; jj<bs2; jj+=bs) {
1480             bap[jj] = *value++;
1481           }
1482         }
1483       } else {
1484         for (ii=0; ii<bs; ii++,value+=stepval) {
1485           for (jj=0; jj<bs; jj++) {
1486             *bap++  = *value++;
1487           }
1488         }
1489       }
1490       noinsert2:;
1491       low = i;
1492     }
1493     ailen[row] = nrow;
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1500 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1501 {
1502   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1504   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1505   PetscErrorCode ierr;
1506   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1507   MatScalar      *aa = a->a,*ap;
1508   PetscReal      ratio=0.6;
1509 
1510   PetscFunctionBegin;
1511   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1512 
1513   if (m) rmax = ailen[0];
1514   for (i=1; i<mbs; i++) {
1515     /* move each row back by the amount of empty slots (fshift) before it*/
1516     fshift += imax[i-1] - ailen[i-1];
1517     rmax   = PetscMax(rmax,ailen[i]);
1518     if (fshift) {
1519       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1520       N = ailen[i];
1521       for (j=0; j<N; j++) {
1522         ip[j-fshift] = ip[j];
1523         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1524       }
1525     }
1526     ai[i] = ai[i-1] + ailen[i-1];
1527   }
1528   if (mbs) {
1529     fshift += imax[mbs-1] - ailen[mbs-1];
1530     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1531   }
1532   /* reset ilen and imax for each row */
1533   for (i=0; i<mbs; i++) {
1534     ailen[i] = imax[i] = ai[i+1] - ai[i];
1535   }
1536   a->nz = ai[mbs];
1537 
1538   /* diagonals may have moved, so kill the diagonal pointers */
1539   a->idiagvalid = PETSC_FALSE;
1540   if (fshift && a->diag) {
1541     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1542     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1543     a->diag = 0;
1544   }
1545   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2);
1546   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs);
1547   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
1548   a->reallocs          = 0;
1549   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1550 
1551   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1552   if (a->compressedrow.use){
1553     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1554   }
1555 
1556   A->same_nonzero = PETSC_TRUE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 /*
1561    This function returns an array of flags which indicate the locations of contiguous
1562    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1563    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1564    Assume: sizes should be long enough to hold all the values.
1565 */
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1568 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1569 {
1570   PetscInt   i,j,k,row;
1571   PetscTruth flg;
1572 
1573   PetscFunctionBegin;
1574   for (i=0,j=0; i<n; j++) {
1575     row = idx[i];
1576     if (row%bs!=0) { /* Not the begining of a block */
1577       sizes[j] = 1;
1578       i++;
1579     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1580       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1581       i++;
1582     } else { /* Begining of the block, so check if the complete block exists */
1583       flg = PETSC_TRUE;
1584       for (k=1; k<bs; k++) {
1585         if (row+k != idx[i+k]) { /* break in the block */
1586           flg = PETSC_FALSE;
1587           break;
1588         }
1589       }
1590       if (flg) { /* No break in the bs */
1591         sizes[j] = bs;
1592         i+= bs;
1593       } else {
1594         sizes[j] = 1;
1595         i++;
1596       }
1597     }
1598   }
1599   *bs_max = j;
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1605 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1606 {
1607   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1608   PetscErrorCode ierr;
1609   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1610   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1611   PetscScalar    zero = 0.0;
1612   MatScalar      *aa;
1613 
1614   PetscFunctionBegin;
1615   /* Make a copy of the IS and  sort it */
1616   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1617   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1618 
1619   /* allocate memory for rows,sizes */
1620   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1621   sizes = rows + is_n;
1622 
1623   /* copy IS values to rows, and sort them */
1624   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1625   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1626   if (baij->keepzeroedrows) {
1627     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1628     bs_max = is_n;
1629     A->same_nonzero = PETSC_TRUE;
1630   } else {
1631     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1632     A->same_nonzero = PETSC_FALSE;
1633   }
1634   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1635 
1636   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1637     row   = rows[j];
1638     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1639     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1640     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1641     if (sizes[i] == bs && !baij->keepzeroedrows) {
1642       if (diag) {
1643         if (baij->ilen[row/bs] > 0) {
1644           baij->ilen[row/bs]       = 1;
1645           baij->j[baij->i[row/bs]] = row/bs;
1646           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1647         }
1648         /* Now insert all the diagonal values for this bs */
1649         for (k=0; k<bs; k++) {
1650           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1651         }
1652       } else { /* (!diag) */
1653         baij->ilen[row/bs] = 0;
1654       } /* end (!diag) */
1655     } else { /* (sizes[i] != bs) */
1656 #if defined (PETSC_USE_DEBUG)
1657       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1658 #endif
1659       for (k=0; k<count; k++) {
1660         aa[0] =  zero;
1661         aa    += bs;
1662       }
1663       if (diag) {
1664         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1665       }
1666     }
1667   }
1668 
1669   ierr = PetscFree(rows);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1677 {
1678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1679   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1680   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1681   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1682   PetscErrorCode ierr;
1683   PetscInt       ridx,cidx,bs2=a->bs2;
1684   PetscTruth     roworiented=a->roworiented;
1685   MatScalar      *ap,value,*aa=a->a,*bap;
1686 
1687   PetscFunctionBegin;
1688   for (k=0; k<m; k++) { /* loop over added rows */
1689     row  = im[k]; brow = row/bs;
1690     if (row < 0) continue;
1691 #if defined(PETSC_USE_DEBUG)
1692     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1693 #endif
1694     rp   = aj + ai[brow];
1695     ap   = aa + bs2*ai[brow];
1696     rmax = imax[brow];
1697     nrow = ailen[brow];
1698     low  = 0;
1699     high = nrow;
1700     for (l=0; l<n; l++) { /* loop over added columns */
1701       if (in[l] < 0) continue;
1702 #if defined(PETSC_USE_DEBUG)
1703       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1704 #endif
1705       col = in[l]; bcol = col/bs;
1706       ridx = row % bs; cidx = col % bs;
1707       if (roworiented) {
1708         value = v[l + k*n];
1709       } else {
1710         value = v[k + l*m];
1711       }
1712       if (col < lastcol) low = 0; else high = nrow;
1713       lastcol = col;
1714       while (high-low > 7) {
1715         t = (low+high)/2;
1716         if (rp[t] > bcol) high = t;
1717         else              low  = t;
1718       }
1719       for (i=low; i<high; i++) {
1720         if (rp[i] > bcol) break;
1721         if (rp[i] == bcol) {
1722           bap  = ap +  bs2*i + bs*cidx + ridx;
1723           if (is == ADD_VALUES) *bap += value;
1724           else                  *bap  = value;
1725           goto noinsert1;
1726         }
1727       }
1728       if (nonew == 1) goto noinsert1;
1729       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730       if (nrow >= rmax) {
1731         /* there is no extra room in row, therefore enlarge */
1732         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1733         MatScalar *new_a;
1734 
1735         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1736 
1737         /* Malloc new storage space */
1738         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1739 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1740         new_j   = (PetscInt*)(new_a + bs2*new_nz);
1741         new_i   = new_j + new_nz;
1742 
1743         /* copy over old data into new slots */
1744         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1745         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1746         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1747         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1748         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1749         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1750         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1751         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1752         /* free up old matrix storage */
1753         ierr = PetscFree(a->a);CHKERRQ(ierr);
1754         if (!a->singlemalloc) {
1755           ierr = PetscFree(a->i);CHKERRQ(ierr);
1756           ierr = PetscFree(a->j);CHKERRQ(ierr);
1757         }
1758         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1759         a->singlemalloc = PETSC_TRUE;
1760 
1761         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1762         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1763         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1764         a->maxnz += bs2*CHUNKSIZE;
1765         a->reallocs++;
1766         a->nz++;
1767       }
1768       N = nrow++ - 1;
1769       /* shift up all the later entries in this row */
1770       for (ii=N; ii>=i; ii--) {
1771         rp[ii+1] = rp[ii];
1772         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1773       }
1774       if (N>=i) {
1775         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1776       }
1777       rp[i]                      = bcol;
1778       ap[bs2*i + bs*cidx + ridx] = value;
1779       noinsert1:;
1780       low = i;
1781     }
1782     ailen[brow] = nrow;
1783   }
1784   A->same_nonzero = PETSC_FALSE;
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1791 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1792 {
1793   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1794   Mat            outA;
1795   PetscErrorCode ierr;
1796   PetscTruth     row_identity,col_identity;
1797 
1798   PetscFunctionBegin;
1799   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1800   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1801   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1802   if (!row_identity || !col_identity) {
1803     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1804   }
1805 
1806   outA          = inA;
1807   inA->factor   = FACTOR_LU;
1808 
1809   if (!a->diag) {
1810     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1811   }
1812 
1813   a->row        = row;
1814   a->col        = col;
1815   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1816   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1817 
1818   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1819   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1820   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1821 
1822   /*
1823       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1824       for ILU(0) factorization with natural ordering
1825   */
1826   if (inA->bs < 8) {
1827     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1828   } else {
1829     if (!a->solve_work) {
1830       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1831       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1832     }
1833   }
1834 
1835   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1836 
1837   PetscFunctionReturn(0);
1838 }
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1841 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1842 {
1843   static PetscTruth called = PETSC_FALSE;
1844   MPI_Comm          comm = A->comm;
1845   PetscErrorCode    ierr;
1846 
1847   PetscFunctionBegin;
1848   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1849   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1850   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 EXTERN_C_BEGIN
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1857 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1858 {
1859   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1860   PetscInt    i,nz,nbs;
1861 
1862   PetscFunctionBegin;
1863   nz  = baij->maxnz/baij->bs2;
1864   nbs = baij->nbs;
1865   for (i=0; i<nz; i++) {
1866     baij->j[i] = indices[i];
1867   }
1868   baij->nz = nz;
1869   for (i=0; i<nbs; i++) {
1870     baij->ilen[i] = baij->imax[i];
1871   }
1872 
1873   PetscFunctionReturn(0);
1874 }
1875 EXTERN_C_END
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1879 /*@
1880     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1881        in the matrix.
1882 
1883   Input Parameters:
1884 +  mat - the SeqBAIJ matrix
1885 -  indices - the column indices
1886 
1887   Level: advanced
1888 
1889   Notes:
1890     This can be called if you have precomputed the nonzero structure of the
1891   matrix and want to provide it to the matrix object to improve the performance
1892   of the MatSetValues() operation.
1893 
1894     You MUST have set the correct numbers of nonzeros per row in the call to
1895   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1896 
1897     MUST be called before any calls to MatSetValues();
1898 
1899 @*/
1900 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1901 {
1902   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1903 
1904   PetscFunctionBegin;
1905   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1906   PetscValidPointer(indices,2);
1907   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1908   if (f) {
1909     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1910   } else {
1911     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1918 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1919 {
1920   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1921   PetscErrorCode ierr;
1922   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1923   PetscReal      atmp;
1924   PetscScalar    *x,zero = 0.0;
1925   MatScalar      *aa;
1926   PetscInt       ncols,brow,krow,kcol;
1927 
1928   PetscFunctionBegin;
1929   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1930   bs   = A->bs;
1931   aa   = a->a;
1932   ai   = a->i;
1933   aj   = a->j;
1934   mbs = a->mbs;
1935 
1936   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1937   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1938   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1939   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1940   for (i=0; i<mbs; i++) {
1941     ncols = ai[1] - ai[0]; ai++;
1942     brow  = bs*i;
1943     for (j=0; j<ncols; j++){
1944       /* bcol = bs*(*aj); */
1945       for (kcol=0; kcol<bs; kcol++){
1946         for (krow=0; krow<bs; krow++){
1947           atmp = PetscAbsScalar(*aa); aa++;
1948           row = brow + krow;    /* row index */
1949           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1950           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1951         }
1952       }
1953       aj++;
1954     }
1955   }
1956   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1962 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1963 {
1964   PetscErrorCode ierr;
1965 
1966   PetscFunctionBegin;
1967   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1973 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1974 {
1975   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1976   PetscFunctionBegin;
1977   *array = a->a;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1983 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1984 {
1985   PetscFunctionBegin;
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #include "petscblaslapack.h"
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1992 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1993 {
1994   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1995   PetscErrorCode ierr;
1996   PetscInt       i,bs=Y->bs,j,bs2;
1997   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1998 
1999   PetscFunctionBegin;
2000   if (str == SAME_NONZERO_PATTERN) {
2001     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2002   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2003     if (y->xtoy && y->XtoY != X) {
2004       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2005       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2006     }
2007     if (!y->xtoy) { /* get xtoy */
2008       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2009       y->XtoY = X;
2010     }
2011     bs2 = bs*bs;
2012     for (i=0; i<x->nz; i++) {
2013       j = 0;
2014       while (j < bs2){
2015         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2016         j++;
2017       }
2018     }
2019     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2020   } else {
2021     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 /* -------------------------------------------------------------------*/
2027 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2028        MatGetRow_SeqBAIJ,
2029        MatRestoreRow_SeqBAIJ,
2030        MatMult_SeqBAIJ_N,
2031 /* 4*/ MatMultAdd_SeqBAIJ_N,
2032        MatMultTranspose_SeqBAIJ,
2033        MatMultTransposeAdd_SeqBAIJ,
2034        MatSolve_SeqBAIJ_N,
2035        0,
2036        0,
2037 /*10*/ 0,
2038        MatLUFactor_SeqBAIJ,
2039        0,
2040        0,
2041        MatTranspose_SeqBAIJ,
2042 /*15*/ MatGetInfo_SeqBAIJ,
2043        MatEqual_SeqBAIJ,
2044        MatGetDiagonal_SeqBAIJ,
2045        MatDiagonalScale_SeqBAIJ,
2046        MatNorm_SeqBAIJ,
2047 /*20*/ 0,
2048        MatAssemblyEnd_SeqBAIJ,
2049        0,
2050        MatSetOption_SeqBAIJ,
2051        MatZeroEntries_SeqBAIJ,
2052 /*25*/ MatZeroRows_SeqBAIJ,
2053        MatLUFactorSymbolic_SeqBAIJ,
2054        MatLUFactorNumeric_SeqBAIJ_N,
2055        MatCholeskyFactorSymbolic_SeqBAIJ,
2056        MatCholeskyFactorNumeric_SeqBAIJ_N,
2057 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2058        MatILUFactorSymbolic_SeqBAIJ,
2059        MatICCFactorSymbolic_SeqBAIJ,
2060        MatGetArray_SeqBAIJ,
2061        MatRestoreArray_SeqBAIJ,
2062 /*35*/ MatDuplicate_SeqBAIJ,
2063        0,
2064        0,
2065        MatILUFactor_SeqBAIJ,
2066        0,
2067 /*40*/ MatAXPY_SeqBAIJ,
2068        MatGetSubMatrices_SeqBAIJ,
2069        MatIncreaseOverlap_SeqBAIJ,
2070        MatGetValues_SeqBAIJ,
2071        0,
2072 /*45*/ MatPrintHelp_SeqBAIJ,
2073        MatScale_SeqBAIJ,
2074        0,
2075        0,
2076        0,
2077 /*50*/ 0,
2078        MatGetRowIJ_SeqBAIJ,
2079        MatRestoreRowIJ_SeqBAIJ,
2080        0,
2081        0,
2082 /*55*/ 0,
2083        0,
2084        0,
2085        0,
2086        MatSetValuesBlocked_SeqBAIJ,
2087 /*60*/ MatGetSubMatrix_SeqBAIJ,
2088        MatDestroy_SeqBAIJ,
2089        MatView_SeqBAIJ,
2090        MatGetPetscMaps_Petsc,
2091        0,
2092 /*65*/ 0,
2093        0,
2094        0,
2095        0,
2096        0,
2097 /*70*/ MatGetRowMax_SeqBAIJ,
2098        MatConvert_Basic,
2099        0,
2100        0,
2101        0,
2102 /*75*/ 0,
2103        0,
2104        0,
2105        0,
2106        0,
2107 /*80*/ 0,
2108        0,
2109        0,
2110        0,
2111        MatLoad_SeqBAIJ,
2112 /*85*/ 0,
2113        0,
2114        0,
2115        0,
2116        0,
2117 /*90*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*95*/ 0,
2123        0,
2124        0,
2125        0};
2126 
2127 EXTERN_C_BEGIN
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2130 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2131 {
2132   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2133   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   if (aij->nonew != 1) {
2138     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2139   }
2140 
2141   /* allocate space for values if not already there */
2142   if (!aij->saved_values) {
2143     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2144   }
2145 
2146   /* copy values over */
2147   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 EXTERN_C_END
2151 
2152 EXTERN_C_BEGIN
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2155 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2156 {
2157   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2158   PetscErrorCode ierr;
2159   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2160 
2161   PetscFunctionBegin;
2162   if (aij->nonew != 1) {
2163     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2164   }
2165   if (!aij->saved_values) {
2166     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2167   }
2168 
2169   /* copy values over */
2170   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 EXTERN_C_END
2174 
2175 EXTERN_C_BEGIN
2176 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2177 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2178 EXTERN_C_END
2179 
2180 EXTERN_C_BEGIN
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2183 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2184 {
2185   Mat_SeqBAIJ    *b;
2186   PetscErrorCode ierr;
2187   PetscInt       i,len,mbs,nbs,bs2,newbs = bs;
2188   PetscTruth     flg;
2189 
2190   PetscFunctionBegin;
2191 
2192   B->preallocated = PETSC_TRUE;
2193   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2194   if (nnz && newbs != bs) {
2195     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2196   }
2197   bs   = newbs;
2198 
2199   mbs  = B->m/bs;
2200   nbs  = B->n/bs;
2201   bs2  = bs*bs;
2202 
2203   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2204     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2205   }
2206 
2207   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2208   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2209   if (nnz) {
2210     for (i=0; i<mbs; i++) {
2211       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2212       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2213     }
2214   }
2215 
2216   b       = (Mat_SeqBAIJ*)B->data;
2217   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2218   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2219   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2220   if (!flg) {
2221     switch (bs) {
2222     case 1:
2223       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2224       B->ops->mult            = MatMult_SeqBAIJ_1;
2225       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2226       break;
2227     case 2:
2228       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2229       B->ops->mult            = MatMult_SeqBAIJ_2;
2230       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2231       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2232       break;
2233     case 3:
2234       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2235       B->ops->mult            = MatMult_SeqBAIJ_3;
2236       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2237       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2238       break;
2239     case 4:
2240       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2241       B->ops->mult            = MatMult_SeqBAIJ_4;
2242       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2243       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2244       break;
2245     case 5:
2246       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2247       B->ops->mult            = MatMult_SeqBAIJ_5;
2248       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2249       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2250       break;
2251     case 6:
2252       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2253       B->ops->mult            = MatMult_SeqBAIJ_6;
2254       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2255       break;
2256     case 7:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2258       B->ops->mult            = MatMult_SeqBAIJ_7;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2260       break;
2261     default:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2263       B->ops->mult            = MatMult_SeqBAIJ_N;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2265       break;
2266     }
2267   }
2268   B->bs      = bs;
2269   b->mbs     = mbs;
2270   b->nbs     = nbs;
2271   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr);
2272   if (!nnz) {
2273     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2274     else if (nz <= 0)        nz = 1;
2275     for (i=0; i<mbs; i++) b->imax[i] = nz;
2276     nz = nz*mbs;
2277   } else {
2278     nz = 0;
2279     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2280   }
2281 
2282   /* allocate the matrix space */
2283   len   = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2284   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2285   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2286   b->j  = (PetscInt*)(b->a + nz*bs2);
2287   ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2288   b->i  = b->j + nz;
2289   b->singlemalloc = PETSC_TRUE;
2290 
2291   b->i[0] = 0;
2292   for (i=1; i<mbs+1; i++) {
2293     b->i[i] = b->i[i-1] + b->imax[i-1];
2294   }
2295 
2296   /* b->ilen will count nonzeros in each block row so far. */
2297   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr);
2298   ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2299   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2300 
2301   B->bs               = bs;
2302   b->bs2              = bs2;
2303   b->mbs              = mbs;
2304   b->nz               = 0;
2305   b->maxnz            = nz*bs2;
2306   B->info.nz_unneeded = (PetscReal)b->maxnz;
2307   PetscFunctionReturn(0);
2308 }
2309 EXTERN_C_END
2310 
2311 /*MC
2312    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2313    block sparse compressed row format.
2314 
2315    Options Database Keys:
2316 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2317 
2318   Level: beginner
2319 
2320 .seealso: MatCreateSeqBAIJ
2321 M*/
2322 
2323 EXTERN_C_BEGIN
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatCreate_SeqBAIJ"
2326 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2327 {
2328   PetscErrorCode ierr;
2329   PetscMPIInt    size;
2330   Mat_SeqBAIJ    *b;
2331 
2332   PetscFunctionBegin;
2333   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2334   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2335 
2336   B->m = B->M = PetscMax(B->m,B->M);
2337   B->n = B->N = PetscMax(B->n,B->N);
2338   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2339   B->data = (void*)b;
2340   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2341   B->factor           = 0;
2342   B->lupivotthreshold = 1.0;
2343   B->mapping          = 0;
2344   b->row              = 0;
2345   b->col              = 0;
2346   b->icol             = 0;
2347   b->reallocs         = 0;
2348   b->saved_values     = 0;
2349 #if defined(PETSC_USE_MAT_SINGLE)
2350   b->setvalueslen     = 0;
2351   b->setvaluescopy    = PETSC_NULL;
2352 #endif
2353 
2354   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2355   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2356 
2357   b->sorted           = PETSC_FALSE;
2358   b->roworiented      = PETSC_TRUE;
2359   b->nonew            = 0;
2360   b->diag             = 0;
2361   b->solve_work       = 0;
2362   b->mult_work        = 0;
2363   B->spptr            = 0;
2364   B->info.nz_unneeded = (PetscReal)b->maxnz;
2365   b->keepzeroedrows   = PETSC_FALSE;
2366   b->xtoy              = 0;
2367   b->XtoY              = 0;
2368   b->compressedrow.use     = PETSC_FALSE;
2369   b->compressedrow.nrows   = 0;
2370   b->compressedrow.i       = PETSC_NULL;
2371   b->compressedrow.rindex  = PETSC_NULL;
2372   b->compressedrow.checked = PETSC_FALSE;
2373   B->same_nonzero          = PETSC_FALSE;
2374 
2375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2376                                      "MatStoreValues_SeqBAIJ",
2377                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2379                                      "MatRetrieveValues_SeqBAIJ",
2380                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2382                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2383                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2385                                      "MatConvert_SeqBAIJ_SeqAIJ",
2386                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2387 #if !defined(PETSC_USE_64BIT_INT)
2388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2389                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2390                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2391 #endif
2392   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2393                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2394                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 EXTERN_C_END
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2401 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2402 {
2403   Mat            C;
2404   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2405   PetscErrorCode ierr;
2406   PetscInt       i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2407 
2408   PetscFunctionBegin;
2409   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2410 
2411   *B = 0;
2412   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2413   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2414   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2415   c    = (Mat_SeqBAIJ*)C->data;
2416 
2417   C->M   = A->M;
2418   C->N   = A->N;
2419   C->bs  = A->bs;
2420   c->bs2 = a->bs2;
2421   c->mbs = a->mbs;
2422   c->nbs = a->nbs;
2423 
2424   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2425   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2426   for (i=0; i<mbs; i++) {
2427     c->imax[i] = a->imax[i];
2428     c->ilen[i] = a->ilen[i];
2429   }
2430 
2431   /* allocate the matrix space */
2432   c->singlemalloc = PETSC_TRUE;
2433   len  = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2434   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2435   c->j = (PetscInt*)(c->a + nz*bs2);
2436   c->i = c->j + nz;
2437   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2438   if (mbs > 0) {
2439     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2440     if (cpvalues == MAT_COPY_VALUES) {
2441       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2442     } else {
2443       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2444     }
2445   }
2446 
2447   ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2448   c->sorted      = a->sorted;
2449   c->roworiented = a->roworiented;
2450   c->nonew       = a->nonew;
2451 
2452   if (a->diag) {
2453     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2455     for (i=0; i<mbs; i++) {
2456       c->diag[i] = a->diag[i];
2457     }
2458   } else c->diag        = 0;
2459   c->nz                 = a->nz;
2460   c->maxnz              = a->maxnz;
2461   c->solve_work         = 0;
2462   c->mult_work          = 0;
2463   C->preallocated       = PETSC_TRUE;
2464   C->assembled          = PETSC_TRUE;
2465 
2466   c->compressedrow.use     = a->compressedrow.use;
2467   c->compressedrow.nrows   = a->compressedrow.nrows;
2468   c->compressedrow.checked = a->compressedrow.checked;
2469   if ( a->compressedrow.checked && a->compressedrow.use){
2470     i = a->compressedrow.nrows;
2471     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2472     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2473     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2474     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2475   } else {
2476     c->compressedrow.use    = PETSC_FALSE;
2477     c->compressedrow.i      = PETSC_NULL;
2478     c->compressedrow.rindex = PETSC_NULL;
2479   }
2480   C->same_nonzero = A->same_nonzero;
2481   *B = C;
2482   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatLoad_SeqBAIJ"
2488 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2489 {
2490   Mat_SeqBAIJ    *a;
2491   Mat            B;
2492   PetscErrorCode ierr;
2493   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2494   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2495   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2496   PetscInt       *masked,nmask,tmp,bs2,ishift;
2497   PetscMPIInt    size;
2498   int            fd;
2499   PetscScalar    *aa;
2500   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2501 
2502   PetscFunctionBegin;
2503   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2504   bs2  = bs*bs;
2505 
2506   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2507   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2508   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2509   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2510   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2511   M = header[1]; N = header[2]; nz = header[3];
2512 
2513   if (header[3] < 0) {
2514     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2515   }
2516 
2517   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2518 
2519   /*
2520      This code adds extra rows to make sure the number of rows is
2521     divisible by the blocksize
2522   */
2523   mbs        = M/bs;
2524   extra_rows = bs - M + bs*(mbs);
2525   if (extra_rows == bs) extra_rows = 0;
2526   else                  mbs++;
2527   if (extra_rows) {
2528     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2529   }
2530 
2531   /* read in row lengths */
2532   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2533   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2534   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2535 
2536   /* read in column indices */
2537   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2538   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2539   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2540 
2541   /* loop over row lengths determining block row lengths */
2542   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2543   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2544   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2545   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2546   masked   = mask + mbs;
2547   rowcount = 0; nzcount = 0;
2548   for (i=0; i<mbs; i++) {
2549     nmask = 0;
2550     for (j=0; j<bs; j++) {
2551       kmax = rowlengths[rowcount];
2552       for (k=0; k<kmax; k++) {
2553         tmp = jj[nzcount++]/bs;
2554         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2555       }
2556       rowcount++;
2557     }
2558     browlengths[i] += nmask;
2559     /* zero out the mask elements we set */
2560     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2561   }
2562 
2563   /* create our matrix */
2564   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2565   ierr = MatSetType(B,type);CHKERRQ(ierr);
2566   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2567   a = (Mat_SeqBAIJ*)B->data;
2568 
2569   /* set matrix "i" values */
2570   a->i[0] = 0;
2571   for (i=1; i<= mbs; i++) {
2572     a->i[i]      = a->i[i-1] + browlengths[i-1];
2573     a->ilen[i-1] = browlengths[i-1];
2574   }
2575   a->nz         = 0;
2576   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2577 
2578   /* read in nonzero values */
2579   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2580   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2581   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2582 
2583   /* set "a" and "j" values into matrix */
2584   nzcount = 0; jcount = 0;
2585   for (i=0; i<mbs; i++) {
2586     nzcountb = nzcount;
2587     nmask    = 0;
2588     for (j=0; j<bs; j++) {
2589       kmax = rowlengths[i*bs+j];
2590       for (k=0; k<kmax; k++) {
2591         tmp = jj[nzcount++]/bs;
2592 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2593       }
2594     }
2595     /* sort the masked values */
2596     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2597 
2598     /* set "j" values into matrix */
2599     maskcount = 1;
2600     for (j=0; j<nmask; j++) {
2601       a->j[jcount++]  = masked[j];
2602       mask[masked[j]] = maskcount++;
2603     }
2604     /* set "a" values into matrix */
2605     ishift = bs2*a->i[i];
2606     for (j=0; j<bs; j++) {
2607       kmax = rowlengths[i*bs+j];
2608       for (k=0; k<kmax; k++) {
2609         tmp       = jj[nzcountb]/bs ;
2610         block     = mask[tmp] - 1;
2611         point     = jj[nzcountb] - bs*tmp;
2612         idx       = ishift + bs2*block + j + bs*point;
2613         a->a[idx] = (MatScalar)aa[nzcountb++];
2614       }
2615     }
2616     /* zero out the mask elements we set */
2617     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2618   }
2619   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2620 
2621   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2622   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2623   ierr = PetscFree(aa);CHKERRQ(ierr);
2624   ierr = PetscFree(jj);CHKERRQ(ierr);
2625   ierr = PetscFree(mask);CHKERRQ(ierr);
2626 
2627   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2628   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629   ierr = MatView_Private(B);CHKERRQ(ierr);
2630 
2631   *A = B;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatCreateSeqBAIJ"
2637 /*@C
2638    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2639    compressed row) format.  For good matrix assembly performance the
2640    user should preallocate the matrix storage by setting the parameter nz
2641    (or the array nnz).  By setting these parameters accurately, performance
2642    during matrix assembly can be increased by more than a factor of 50.
2643 
2644    Collective on MPI_Comm
2645 
2646    Input Parameters:
2647 +  comm - MPI communicator, set to PETSC_COMM_SELF
2648 .  bs - size of block
2649 .  m - number of rows
2650 .  n - number of columns
2651 .  nz - number of nonzero blocks  per block row (same for all rows)
2652 -  nnz - array containing the number of nonzero blocks in the various block rows
2653          (possibly different for each block row) or PETSC_NULL
2654 
2655    Output Parameter:
2656 .  A - the matrix
2657 
2658    Options Database Keys:
2659 .   -mat_no_unroll - uses code that does not unroll the loops in the
2660                      block calculations (much slower)
2661 .    -mat_block_size - size of the blocks to use
2662 
2663    Level: intermediate
2664 
2665    Notes:
2666    The number of rows and columns must be divisible by blocksize.
2667 
2668    If the nnz parameter is given then the nz parameter is ignored
2669 
2670    A nonzero block is any block that as 1 or more nonzeros in it
2671 
2672    The block AIJ format is fully compatible with standard Fortran 77
2673    storage.  That is, the stored row and column indices can begin at
2674    either one (as in Fortran) or zero.  See the users' manual for details.
2675 
2676    Specify the preallocated storage with either nz or nnz (not both).
2677    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2678    allocation.  For additional details, see the users manual chapter on
2679    matrices.
2680 
2681 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2682 @*/
2683 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2684 {
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2689   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2690   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2696 /*@C
2697    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2698    per row in the matrix. For good matrix assembly performance the
2699    user should preallocate the matrix storage by setting the parameter nz
2700    (or the array nnz).  By setting these parameters accurately, performance
2701    during matrix assembly can be increased by more than a factor of 50.
2702 
2703    Collective on MPI_Comm
2704 
2705    Input Parameters:
2706 +  A - the matrix
2707 .  bs - size of block
2708 .  nz - number of block nonzeros per block row (same for all rows)
2709 -  nnz - array containing the number of block nonzeros in the various block rows
2710          (possibly different for each block row) or PETSC_NULL
2711 
2712    Options Database Keys:
2713 .   -mat_no_unroll - uses code that does not unroll the loops in the
2714                      block calculations (much slower)
2715 .    -mat_block_size - size of the blocks to use
2716 
2717    Level: intermediate
2718 
2719    Notes:
2720    If the nnz parameter is given then the nz parameter is ignored
2721 
2722    The block AIJ format is fully compatible with standard Fortran 77
2723    storage.  That is, the stored row and column indices can begin at
2724    either one (as in Fortran) or zero.  See the users' manual for details.
2725 
2726    Specify the preallocated storage with either nz or nnz (not both).
2727    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2728    allocation.  For additional details, see the users manual chapter on
2729    matrices.
2730 
2731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2732 @*/
2733 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2734 {
2735   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2739   if (f) {
2740     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2741   }
2742   PetscFunctionReturn(0);
2743 }
2744 
2745