xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 09f382308e660e6aeafeafa6047bab56e09352c9)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4 + l)*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (!a->diag) {
760     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
761   }
762   for (i=0; i<m; i++) {
763     a->diag[i] = a->i[i+1];
764     for (j=a->i[i]; j<a->i[i+1]; j++) {
765       if (a->j[j] == i) {
766         a->diag[i] = j;
767         break;
768       }
769     }
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 
775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
780 {
781   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
782   PetscErrorCode ierr;
783   PetscInt       n = a->mbs,i;
784 
785   PetscFunctionBegin;
786   *nn = n;
787   if (!ia) PetscFunctionReturn(0);
788   if (symmetric) {
789     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
790   } else if (oshift == 1) {
791     /* temporarily add 1 to i and j indices */
792     PetscInt nz = a->i[n];
793     for (i=0; i<nz; i++) a->j[i]++;
794     for (i=0; i<n+1; i++) a->i[i]++;
795     *ia = a->i; *ja = a->j;
796   } else {
797     *ia = a->i; *ja = a->j;
798   }
799 
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
806 {
807   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       i,n = a->mbs;
810 
811   PetscFunctionBegin;
812   if (!ia) PetscFunctionReturn(0);
813   if (symmetric) {
814     ierr = PetscFree(*ia);CHKERRQ(ierr);
815     ierr = PetscFree(*ja);CHKERRQ(ierr);
816   } else if (oshift == 1) {
817     PetscInt nz = a->i[n]-1;
818     for (i=0; i<nz; i++) a->j[i]--;
819     for (i=0; i<n+1; i++) a->i[i]--;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_SeqBAIJ"
826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
834 #endif
835   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
836   if (a->row) {
837     ierr = ISDestroy(a->row);CHKERRQ(ierr);
838   }
839   if (a->col) {
840     ierr = ISDestroy(a->col);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(a->diag);CHKERRQ(ierr);
843   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
844   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
845   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
846   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
847   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
848   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
849 #if defined(PETSC_USE_MAT_SINGLE)
850   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
851 #endif
852   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
853   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
854 
855   ierr = PetscFree(a);CHKERRQ(ierr);
856 
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatSetOption_SeqBAIJ"
869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
870 {
871   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   switch (op) {
876   case MAT_ROW_ORIENTED:
877     a->roworiented    = PETSC_TRUE;
878     break;
879   case MAT_COLUMN_ORIENTED:
880     a->roworiented    = PETSC_FALSE;
881     break;
882   case MAT_COLUMNS_SORTED:
883     a->sorted         = PETSC_TRUE;
884     break;
885   case MAT_COLUMNS_UNSORTED:
886     a->sorted         = PETSC_FALSE;
887     break;
888   case MAT_KEEP_ZEROED_ROWS:
889     a->keepzeroedrows = PETSC_TRUE;
890     break;
891   case MAT_NO_NEW_NONZERO_LOCATIONS:
892     a->nonew          = 1;
893     break;
894   case MAT_NEW_NONZERO_LOCATION_ERR:
895     a->nonew          = -1;
896     break;
897   case MAT_NEW_NONZERO_ALLOCATION_ERR:
898     a->nonew          = -2;
899     break;
900   case MAT_YES_NEW_NONZERO_LOCATIONS:
901     a->nonew          = 0;
902     break;
903   case MAT_ROWS_SORTED:
904   case MAT_ROWS_UNSORTED:
905   case MAT_YES_NEW_DIAGONALS:
906   case MAT_IGNORE_OFF_PROC_ENTRIES:
907   case MAT_USE_HASH_TABLE:
908     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
909     break;
910   case MAT_NO_NEW_DIAGONALS:
911     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
912   case MAT_SYMMETRIC:
913   case MAT_STRUCTURALLY_SYMMETRIC:
914   case MAT_NOT_SYMMETRIC:
915   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
916   case MAT_HERMITIAN:
917   case MAT_NOT_HERMITIAN:
918   case MAT_SYMMETRY_ETERNAL:
919   case MAT_NOT_SYMMETRY_ETERNAL:
920     break;
921   default:
922     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatGetRow_SeqBAIJ"
929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
930 {
931   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
932   PetscErrorCode ierr;
933   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
934   MatScalar      *aa,*aa_i;
935   PetscScalar    *v_i;
936 
937   PetscFunctionBegin;
938   bs  = A->rmap.bs;
939   ai  = a->i;
940   aj  = a->j;
941   aa  = a->a;
942   bs2 = a->bs2;
943 
944   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
945 
946   bn  = row/bs;   /* Block number */
947   bp  = row % bs; /* Block Position */
948   M   = ai[bn+1] - ai[bn];
949   *nz = bs*M;
950 
951   if (v) {
952     *v = 0;
953     if (*nz) {
954       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
955       for (i=0; i<M; i++) { /* for each block in the block row */
956         v_i  = *v + i*bs;
957         aa_i = aa + bs2*(ai[bn] + i);
958         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
959       }
960     }
961   }
962 
963   if (idx) {
964     *idx = 0;
965     if (*nz) {
966       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
967       for (i=0; i<M; i++) { /* for each block in the block row */
968         idx_i = *idx + i*bs;
969         itmp  = bs*aj[ai[bn] + i];
970         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
971       }
972     }
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
980 {
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
985   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatTranspose_SeqBAIJ"
991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
992 {
993   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
994   Mat            C;
995   PetscErrorCode ierr;
996   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
997   PetscInt       *rows,*cols,bs2=a->bs2;
998   PetscScalar    *array;
999 
1000   PetscFunctionBegin;
1001   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1002   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1003   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1004 
1005 #if defined(PETSC_USE_MAT_SINGLE)
1006   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1007   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1008 #else
1009   array = a->a;
1010 #endif
1011 
1012   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1013   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1014   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1015   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1016   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1017   ierr = PetscFree(col);CHKERRQ(ierr);
1018   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1019   cols = rows + bs;
1020   for (i=0; i<mbs; i++) {
1021     cols[0] = i*bs;
1022     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1023     len = ai[i+1] - ai[i];
1024     for (j=0; j<len; j++) {
1025       rows[0] = (*aj++)*bs;
1026       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1027       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1028       array += bs2;
1029     }
1030   }
1031   ierr = PetscFree(rows);CHKERRQ(ierr);
1032 #if defined(PETSC_USE_MAT_SINGLE)
1033   ierr = PetscFree(array);
1034 #endif
1035 
1036   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038 
1039   if (B) {
1040     *B = C;
1041   } else {
1042     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1049 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1050 {
1051   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1052   PetscErrorCode ierr;
1053   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1054   int            fd;
1055   PetscScalar    *aa;
1056   FILE           *file;
1057 
1058   PetscFunctionBegin;
1059   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1060   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1061   col_lens[0] = MAT_FILE_COOKIE;
1062 
1063   col_lens[1] = A->rmap.N;
1064   col_lens[2] = A->cmap.n;
1065   col_lens[3] = a->nz*bs2;
1066 
1067   /* store lengths of each row and write (including header) to file */
1068   count = 0;
1069   for (i=0; i<a->mbs; i++) {
1070     for (j=0; j<bs; j++) {
1071       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1072     }
1073   }
1074   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1075   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1076 
1077   /* store column indices (zero start index) */
1078   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1079   count = 0;
1080   for (i=0; i<a->mbs; i++) {
1081     for (j=0; j<bs; j++) {
1082       for (k=a->i[i]; k<a->i[i+1]; k++) {
1083         for (l=0; l<bs; l++) {
1084           jj[count++] = bs*a->j[k] + l;
1085         }
1086       }
1087     }
1088   }
1089   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1090   ierr = PetscFree(jj);CHKERRQ(ierr);
1091 
1092   /* store nonzero values */
1093   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1094   count = 0;
1095   for (i=0; i<a->mbs; i++) {
1096     for (j=0; j<bs; j++) {
1097       for (k=a->i[i]; k<a->i[i+1]; k++) {
1098         for (l=0; l<bs; l++) {
1099           aa[count++] = a->a[bs2*k + l*bs + j];
1100         }
1101       }
1102     }
1103   }
1104   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1105   ierr = PetscFree(aa);CHKERRQ(ierr);
1106 
1107   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1108   if (file) {
1109     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1116 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1117 {
1118   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode    ierr;
1120   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1121   PetscViewerFormat format;
1122 
1123   PetscFunctionBegin;
1124   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1125   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1126     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1128     Mat aij;
1129     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1130     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1131     ierr = MatDestroy(aij);CHKERRQ(ierr);
1132   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1133      PetscFunctionReturn(0);
1134   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1135     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1136     for (i=0; i<a->mbs; i++) {
1137       for (j=0; j<bs; j++) {
1138         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1139         for (k=a->i[i]; k<a->i[i+1]; k++) {
1140           for (l=0; l<bs; l++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1144                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             }
1151 #else
1152             if (a->a[bs2*k + l*bs + j] != 0.0) {
1153               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1154             }
1155 #endif
1156           }
1157         }
1158         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1159       }
1160     }
1161     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1162   } else {
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1164     for (i=0; i<a->mbs; i++) {
1165       for (j=0; j<bs; j++) {
1166         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1167         for (k=a->i[i]; k<a->i[i+1]; k++) {
1168           for (l=0; l<bs; l++) {
1169 #if defined(PETSC_USE_COMPLEX)
1170             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1172                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             }
1179 #else
1180             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1181 #endif
1182           }
1183         }
1184         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1185       }
1186     }
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1188   }
1189   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1195 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1196 {
1197   Mat            A = (Mat) Aa;
1198   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1201   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1202   MatScalar      *aa;
1203   PetscViewer    viewer;
1204 
1205   PetscFunctionBegin;
1206 
1207   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1208   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1209 
1210   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1211 
1212   /* loop over matrix elements drawing boxes */
1213   color = PETSC_DRAW_BLUE;
1214   for (i=0,row=0; i<mbs; i++,row+=bs) {
1215     for (j=a->i[i]; j<a->i[i+1]; j++) {
1216       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1217       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1218       aa = a->a + j*bs2;
1219       for (k=0; k<bs; k++) {
1220         for (l=0; l<bs; l++) {
1221           if (PetscRealPart(*aa++) >=  0.) continue;
1222           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1223         }
1224       }
1225     }
1226   }
1227   color = PETSC_DRAW_CYAN;
1228   for (i=0,row=0; i<mbs; i++,row+=bs) {
1229     for (j=a->i[i]; j<a->i[i+1]; j++) {
1230       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1231       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1232       aa = a->a + j*bs2;
1233       for (k=0; k<bs; k++) {
1234         for (l=0; l<bs; l++) {
1235           if (PetscRealPart(*aa++) != 0.) continue;
1236           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1237         }
1238       }
1239     }
1240   }
1241 
1242   color = PETSC_DRAW_RED;
1243   for (i=0,row=0; i<mbs; i++,row+=bs) {
1244     for (j=a->i[i]; j<a->i[i+1]; j++) {
1245       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1246       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1247       aa = a->a + j*bs2;
1248       for (k=0; k<bs; k++) {
1249         for (l=0; l<bs; l++) {
1250           if (PetscRealPart(*aa++) <= 0.) continue;
1251           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1252         }
1253       }
1254     }
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1261 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscReal      xl,yl,xr,yr,w,h;
1265   PetscDraw      draw;
1266   PetscTruth     isnull;
1267 
1268   PetscFunctionBegin;
1269 
1270   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1271   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1272 
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1274   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1275   xr += w;    yr += h;  xl = -w;     yl = -h;
1276   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1277   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1278   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatView_SeqBAIJ"
1284 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1285 {
1286   PetscErrorCode ierr;
1287   PetscTruth     iascii,isbinary,isdraw;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1293   if (iascii){
1294     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1295   } else if (isbinary) {
1296     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1297   } else if (isdraw) {
1298     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1299   } else {
1300     Mat B;
1301     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1302     ierr = MatView(B,viewer);CHKERRQ(ierr);
1303     ierr = MatDestroy(B);CHKERRQ(ierr);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1311 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1312 {
1313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1314   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1315   PetscInt    *ai = a->i,*ailen = a->ilen;
1316   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1317   MatScalar   *ap,*aa = a->a,zero = 0.0;
1318 
1319   PetscFunctionBegin;
1320   for (k=0; k<m; k++) { /* loop over rows */
1321     row  = im[k]; brow = row/bs;
1322     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1323     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1324     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1325     nrow = ailen[brow];
1326     for (l=0; l<n; l++) { /* loop over columns */
1327       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1328       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1329       col  = in[l] ;
1330       bcol = col/bs;
1331       cidx = col%bs;
1332       ridx = row%bs;
1333       high = nrow;
1334       low  = 0; /* assume unsorted */
1335       while (high-low > 5) {
1336         t = (low+high)/2;
1337         if (rp[t] > bcol) high = t;
1338         else             low  = t;
1339       }
1340       for (i=low; i<high; i++) {
1341         if (rp[i] > bcol) break;
1342         if (rp[i] == bcol) {
1343           *v++ = ap[bs2*i+bs*cidx+ridx];
1344           goto finished;
1345         }
1346       }
1347       *v++ = zero;
1348       finished:;
1349     }
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #if defined(PETSC_USE_MAT_SINGLE)
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1357 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1358 {
1359   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,N = m*n*b->bs2;
1362   MatScalar      *vsingle;
1363 
1364   PetscFunctionBegin;
1365   if (N > b->setvalueslen) {
1366     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1367     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1368     b->setvalueslen  = N;
1369   }
1370   vsingle = b->setvaluescopy;
1371   for (i=0; i<N; i++) {
1372     vsingle[i] = v[i];
1373   }
1374   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 #endif
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1382 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1383 {
1384   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1385   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1386   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1387   PetscErrorCode  ierr;
1388   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1389   PetscTruth      roworiented=a->roworiented;
1390   const MatScalar *value = v;
1391   MatScalar       *ap,*aa = a->a,*bap;
1392 
1393   PetscFunctionBegin;
1394   if (roworiented) {
1395     stepval = (n-1)*bs;
1396   } else {
1397     stepval = (m-1)*bs;
1398   }
1399   for (k=0; k<m; k++) { /* loop over added rows */
1400     row  = im[k];
1401     if (row < 0) continue;
1402 #if defined(PETSC_USE_DEBUG)
1403     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1404 #endif
1405     rp   = aj + ai[row];
1406     ap   = aa + bs2*ai[row];
1407     rmax = imax[row];
1408     nrow = ailen[row];
1409     low  = 0;
1410     high = nrow;
1411     for (l=0; l<n; l++) { /* loop over added columns */
1412       if (in[l] < 0) continue;
1413 #if defined(PETSC_USE_DEBUG)
1414       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1415 #endif
1416       col = in[l];
1417       if (roworiented) {
1418         value = v + k*(stepval+bs)*bs + l*bs;
1419       } else {
1420         value = v + l*(stepval+bs)*bs + k*bs;
1421       }
1422       if (col <= lastcol) low = 0; else high = nrow;
1423       lastcol = col;
1424       while (high-low > 7) {
1425         t = (low+high)/2;
1426         if (rp[t] > col) high = t;
1427         else             low  = t;
1428       }
1429       for (i=low; i<high; i++) {
1430         if (rp[i] > col) break;
1431         if (rp[i] == col) {
1432           bap  = ap +  bs2*i;
1433           if (roworiented) {
1434             if (is == ADD_VALUES) {
1435               for (ii=0; ii<bs; ii++,value+=stepval) {
1436                 for (jj=ii; jj<bs2; jj+=bs) {
1437                   bap[jj] += *value++;
1438                 }
1439               }
1440             } else {
1441               for (ii=0; ii<bs; ii++,value+=stepval) {
1442                 for (jj=ii; jj<bs2; jj+=bs) {
1443                   bap[jj] = *value++;
1444                 }
1445               }
1446             }
1447           } else {
1448             if (is == ADD_VALUES) {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=0; jj<bs; jj++) {
1451                   *bap++ += *value++;
1452                 }
1453               }
1454             } else {
1455               for (ii=0; ii<bs; ii++,value+=stepval) {
1456                 for (jj=0; jj<bs; jj++) {
1457                   *bap++  = *value++;
1458                 }
1459               }
1460             }
1461           }
1462           goto noinsert2;
1463         }
1464       }
1465       if (nonew == 1) goto noinsert2;
1466       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1467       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1468       N = nrow++ - 1; high++;
1469       /* shift up all the later entries in this row */
1470       for (ii=N; ii>=i; ii--) {
1471         rp[ii+1] = rp[ii];
1472         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1473       }
1474       if (N >= i) {
1475         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       rp[i] = col;
1478       bap   = ap +  bs2*i;
1479       if (roworiented) {
1480         for (ii=0; ii<bs; ii++,value+=stepval) {
1481           for (jj=ii; jj<bs2; jj+=bs) {
1482             bap[jj] = *value++;
1483           }
1484         }
1485       } else {
1486         for (ii=0; ii<bs; ii++,value+=stepval) {
1487           for (jj=0; jj<bs; jj++) {
1488             *bap++  = *value++;
1489           }
1490         }
1491       }
1492       noinsert2:;
1493       low = i;
1494     }
1495     ailen[row] = nrow;
1496   }
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1502 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1503 {
1504   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1505   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1506   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1507   PetscErrorCode ierr;
1508   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1509   MatScalar      *aa = a->a,*ap;
1510   PetscReal      ratio=0.6;
1511 
1512   PetscFunctionBegin;
1513   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1514 
1515   if (m) rmax = ailen[0];
1516   for (i=1; i<mbs; i++) {
1517     /* move each row back by the amount of empty slots (fshift) before it*/
1518     fshift += imax[i-1] - ailen[i-1];
1519     rmax   = PetscMax(rmax,ailen[i]);
1520     if (fshift) {
1521       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1522       N = ailen[i];
1523       for (j=0; j<N; j++) {
1524         ip[j-fshift] = ip[j];
1525         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1526       }
1527     }
1528     ai[i] = ai[i-1] + ailen[i-1];
1529   }
1530   if (mbs) {
1531     fshift += imax[mbs-1] - ailen[mbs-1];
1532     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1533   }
1534   /* reset ilen and imax for each row */
1535   for (i=0; i<mbs; i++) {
1536     ailen[i] = imax[i] = ai[i+1] - ai[i];
1537   }
1538   a->nz = ai[mbs];
1539 
1540   /* diagonals may have moved, so kill the diagonal pointers */
1541   a->idiagvalid = PETSC_FALSE;
1542   if (fshift && a->diag) {
1543     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1544     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1545     a->diag = 0;
1546   }
1547   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1548   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1549   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1550   a->reallocs          = 0;
1551   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1552 
1553   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1554   if (a->compressedrow.use){
1555     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1556   }
1557 
1558   A->same_nonzero = PETSC_TRUE;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 /*
1563    This function returns an array of flags which indicate the locations of contiguous
1564    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1565    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1566    Assume: sizes should be long enough to hold all the values.
1567 */
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1570 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1571 {
1572   PetscInt   i,j,k,row;
1573   PetscTruth flg;
1574 
1575   PetscFunctionBegin;
1576   for (i=0,j=0; i<n; j++) {
1577     row = idx[i];
1578     if (row%bs!=0) { /* Not the begining of a block */
1579       sizes[j] = 1;
1580       i++;
1581     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1582       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1583       i++;
1584     } else { /* Begining of the block, so check if the complete block exists */
1585       flg = PETSC_TRUE;
1586       for (k=1; k<bs; k++) {
1587         if (row+k != idx[i+k]) { /* break in the block */
1588           flg = PETSC_FALSE;
1589           break;
1590         }
1591       }
1592       if (flg) { /* No break in the bs */
1593         sizes[j] = bs;
1594         i+= bs;
1595       } else {
1596         sizes[j] = 1;
1597         i++;
1598       }
1599     }
1600   }
1601   *bs_max = j;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1607 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1608 {
1609   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1610   PetscErrorCode ierr;
1611   PetscInt       i,j,k,count,*rows;
1612   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1613   PetscScalar    zero = 0.0;
1614   MatScalar      *aa;
1615 
1616   PetscFunctionBegin;
1617   /* Make a copy of the IS and  sort it */
1618   /* allocate memory for rows,sizes */
1619   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1620   sizes = rows + is_n;
1621 
1622   /* copy IS values to rows, and sort them */
1623   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1624   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1625   if (baij->keepzeroedrows) {
1626     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1627     bs_max = is_n;
1628     A->same_nonzero = PETSC_TRUE;
1629   } else {
1630     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1631     A->same_nonzero = PETSC_FALSE;
1632   }
1633 
1634   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1635     row   = rows[j];
1636     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1637     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1638     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1639     if (sizes[i] == bs && !baij->keepzeroedrows) {
1640       if (diag != 0.0) {
1641         if (baij->ilen[row/bs] > 0) {
1642           baij->ilen[row/bs]       = 1;
1643           baij->j[baij->i[row/bs]] = row/bs;
1644           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1645         }
1646         /* Now insert all the diagonal values for this bs */
1647         for (k=0; k<bs; k++) {
1648           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1649         }
1650       } else { /* (diag == 0.0) */
1651         baij->ilen[row/bs] = 0;
1652       } /* end (diag == 0.0) */
1653     } else { /* (sizes[i] != bs) */
1654 #if defined (PETSC_USE_DEBUG)
1655       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1656 #endif
1657       for (k=0; k<count; k++) {
1658         aa[0] =  zero;
1659         aa    += bs;
1660       }
1661       if (diag != 0.0) {
1662         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1663       }
1664     }
1665   }
1666 
1667   ierr = PetscFree(rows);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1674 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1675 {
1676   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1677   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1678   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1679   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1680   PetscErrorCode ierr;
1681   PetscInt       ridx,cidx,bs2=a->bs2;
1682   PetscTruth     roworiented=a->roworiented;
1683   MatScalar      *ap,value,*aa=a->a,*bap;
1684 
1685   PetscFunctionBegin;
1686   for (k=0; k<m; k++) { /* loop over added rows */
1687     row  = im[k];
1688     brow = row/bs;
1689     if (row < 0) continue;
1690 #if defined(PETSC_USE_DEBUG)
1691     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1692 #endif
1693     rp   = aj + ai[brow];
1694     ap   = aa + bs2*ai[brow];
1695     rmax = imax[brow];
1696     nrow = ailen[brow];
1697     low  = 0;
1698     high = nrow;
1699     for (l=0; l<n; l++) { /* loop over added columns */
1700       if (in[l] < 0) continue;
1701 #if defined(PETSC_USE_DEBUG)
1702       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1703 #endif
1704       col = in[l]; bcol = col/bs;
1705       ridx = row % bs; cidx = col % bs;
1706       if (roworiented) {
1707         value = v[l + k*n];
1708       } else {
1709         value = v[k + l*m];
1710       }
1711       if (col <= lastcol) low = 0; else high = nrow;
1712       lastcol = col;
1713       while (high-low > 7) {
1714         t = (low+high)/2;
1715         if (rp[t] > bcol) high = t;
1716         else              low  = t;
1717       }
1718       for (i=low; i<high; i++) {
1719         if (rp[i] > bcol) break;
1720         if (rp[i] == bcol) {
1721           bap  = ap +  bs2*i + bs*cidx + ridx;
1722           if (is == ADD_VALUES) *bap += value;
1723           else                  *bap  = value;
1724           goto noinsert1;
1725         }
1726       }
1727       if (nonew == 1) goto noinsert1;
1728       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1729       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1730       N = nrow++ - 1; high++;
1731       /* shift up all the later entries in this row */
1732       for (ii=N; ii>=i; ii--) {
1733         rp[ii+1] = rp[ii];
1734         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1735       }
1736       if (N>=i) {
1737         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738       }
1739       rp[i]                      = bcol;
1740       ap[bs2*i + bs*cidx + ridx] = value;
1741       a->nz++;
1742       noinsert1:;
1743       low = i;
1744     }
1745     ailen[brow] = nrow;
1746   }
1747   A->same_nonzero = PETSC_FALSE;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1754 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1755 {
1756   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1757   Mat            outA;
1758   PetscErrorCode ierr;
1759   PetscTruth     row_identity,col_identity;
1760 
1761   PetscFunctionBegin;
1762   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1763   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1764   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1765   if (!row_identity || !col_identity) {
1766     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1767   }
1768 
1769   outA          = inA;
1770   inA->factor   = FACTOR_LU;
1771 
1772   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1773 
1774   a->row        = row;
1775   a->col        = col;
1776   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1777   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1778 
1779   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1780   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1781   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1782 
1783   /*
1784       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1785       for ILU(0) factorization with natural ordering
1786   */
1787   if (inA->rmap.bs < 8) {
1788     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1789   } else {
1790     if (!a->solve_work) {
1791       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1792       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1793     }
1794   }
1795 
1796   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1797 
1798   PetscFunctionReturn(0);
1799 }
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1802 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1803 {
1804   static PetscTruth called = PETSC_FALSE;
1805   MPI_Comm          comm = A->comm;
1806   PetscErrorCode    ierr;
1807 
1808   PetscFunctionBegin;
1809   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1810   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1811   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1819 {
1820   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1821   PetscInt    i,nz,nbs;
1822 
1823   PetscFunctionBegin;
1824   nz  = baij->maxnz/baij->bs2;
1825   nbs = baij->nbs;
1826   for (i=0; i<nz; i++) {
1827     baij->j[i] = indices[i];
1828   }
1829   baij->nz = nz;
1830   for (i=0; i<nbs; i++) {
1831     baij->ilen[i] = baij->imax[i];
1832   }
1833 
1834   PetscFunctionReturn(0);
1835 }
1836 EXTERN_C_END
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1840 /*@
1841     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1842        in the matrix.
1843 
1844   Input Parameters:
1845 +  mat - the SeqBAIJ matrix
1846 -  indices - the column indices
1847 
1848   Level: advanced
1849 
1850   Notes:
1851     This can be called if you have precomputed the nonzero structure of the
1852   matrix and want to provide it to the matrix object to improve the performance
1853   of the MatSetValues() operation.
1854 
1855     You MUST have set the correct numbers of nonzeros per row in the call to
1856   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1857 
1858     MUST be called before any calls to MatSetValues();
1859 
1860 @*/
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1862 {
1863   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1864 
1865   PetscFunctionBegin;
1866   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1867   PetscValidPointer(indices,2);
1868   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1869   if (f) {
1870     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1871   } else {
1872     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1879 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1880 {
1881   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1882   PetscErrorCode ierr;
1883   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1884   PetscReal      atmp;
1885   PetscScalar    *x,zero = 0.0;
1886   MatScalar      *aa;
1887   PetscInt       ncols,brow,krow,kcol;
1888 
1889   PetscFunctionBegin;
1890   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1891   bs   = A->rmap.bs;
1892   aa   = a->a;
1893   ai   = a->i;
1894   aj   = a->j;
1895   mbs = a->mbs;
1896 
1897   ierr = VecSet(v,zero);CHKERRQ(ierr);
1898   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1899   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1900   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1901   for (i=0; i<mbs; i++) {
1902     ncols = ai[1] - ai[0]; ai++;
1903     brow  = bs*i;
1904     for (j=0; j<ncols; j++){
1905       /* bcol = bs*(*aj); */
1906       for (kcol=0; kcol<bs; kcol++){
1907         for (krow=0; krow<bs; krow++){
1908           atmp = PetscAbsScalar(*aa); aa++;
1909           row = brow + krow;    /* row index */
1910           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1911           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1912         }
1913       }
1914       aj++;
1915     }
1916   }
1917   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatCopy_SeqBAIJ"
1923 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   /* If the two matrices have the same copy implementation, use fast copy. */
1929   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1930     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1931     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1932 
1933     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1934       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1935     }
1936     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1937   } else {
1938     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1939   }
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1945 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1946 {
1947   PetscErrorCode ierr;
1948 
1949   PetscFunctionBegin;
1950   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1956 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1957 {
1958   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1959   PetscFunctionBegin;
1960   *array = a->a;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1966 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1967 {
1968   PetscFunctionBegin;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #include "petscblaslapack.h"
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1975 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1976 {
1977   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1978   PetscErrorCode ierr;
1979   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1980   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1981 
1982   PetscFunctionBegin;
1983   if (str == SAME_NONZERO_PATTERN) {
1984     PetscScalar alpha = a;
1985     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1986   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1987     if (y->xtoy && y->XtoY != X) {
1988       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1989       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1990     }
1991     if (!y->xtoy) { /* get xtoy */
1992       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1993       y->XtoY = X;
1994     }
1995     bs2 = bs*bs;
1996     for (i=0; i<x->nz; i++) {
1997       j = 0;
1998       while (j < bs2){
1999         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2000         j++;
2001       }
2002     }
2003     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2004   } else {
2005     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2006   }
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2012 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2013 {
2014   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2015   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2016   PetscScalar    *aa = a->a;
2017 
2018   PetscFunctionBegin;
2019   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2025 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2026 {
2027   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2028   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2029   PetscScalar    *aa = a->a;
2030 
2031   PetscFunctionBegin;
2032   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 
2037 /* -------------------------------------------------------------------*/
2038 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2039        MatGetRow_SeqBAIJ,
2040        MatRestoreRow_SeqBAIJ,
2041        MatMult_SeqBAIJ_N,
2042 /* 4*/ MatMultAdd_SeqBAIJ_N,
2043        MatMultTranspose_SeqBAIJ,
2044        MatMultTransposeAdd_SeqBAIJ,
2045        MatSolve_SeqBAIJ_N,
2046        0,
2047        0,
2048 /*10*/ 0,
2049        MatLUFactor_SeqBAIJ,
2050        0,
2051        0,
2052        MatTranspose_SeqBAIJ,
2053 /*15*/ MatGetInfo_SeqBAIJ,
2054        MatEqual_SeqBAIJ,
2055        MatGetDiagonal_SeqBAIJ,
2056        MatDiagonalScale_SeqBAIJ,
2057        MatNorm_SeqBAIJ,
2058 /*20*/ 0,
2059        MatAssemblyEnd_SeqBAIJ,
2060        0,
2061        MatSetOption_SeqBAIJ,
2062        MatZeroEntries_SeqBAIJ,
2063 /*25*/ MatZeroRows_SeqBAIJ,
2064        MatLUFactorSymbolic_SeqBAIJ,
2065        MatLUFactorNumeric_SeqBAIJ_N,
2066        MatCholeskyFactorSymbolic_SeqBAIJ,
2067        MatCholeskyFactorNumeric_SeqBAIJ_N,
2068 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2069        MatILUFactorSymbolic_SeqBAIJ,
2070        MatICCFactorSymbolic_SeqBAIJ,
2071        MatGetArray_SeqBAIJ,
2072        MatRestoreArray_SeqBAIJ,
2073 /*35*/ MatDuplicate_SeqBAIJ,
2074        0,
2075        0,
2076        MatILUFactor_SeqBAIJ,
2077        0,
2078 /*40*/ MatAXPY_SeqBAIJ,
2079        MatGetSubMatrices_SeqBAIJ,
2080        MatIncreaseOverlap_SeqBAIJ,
2081        MatGetValues_SeqBAIJ,
2082        MatCopy_SeqBAIJ,
2083 /*45*/ MatPrintHelp_SeqBAIJ,
2084        MatScale_SeqBAIJ,
2085        0,
2086        0,
2087        0,
2088 /*50*/ 0,
2089        MatGetRowIJ_SeqBAIJ,
2090        MatRestoreRowIJ_SeqBAIJ,
2091        0,
2092        0,
2093 /*55*/ 0,
2094        0,
2095        0,
2096        0,
2097        MatSetValuesBlocked_SeqBAIJ,
2098 /*60*/ MatGetSubMatrix_SeqBAIJ,
2099        MatDestroy_SeqBAIJ,
2100        MatView_SeqBAIJ,
2101        0,
2102        0,
2103 /*65*/ 0,
2104        0,
2105        0,
2106        0,
2107        0,
2108 /*70*/ MatGetRowMax_SeqBAIJ,
2109        MatConvert_Basic,
2110        0,
2111        0,
2112        0,
2113 /*75*/ 0,
2114        0,
2115        0,
2116        0,
2117        0,
2118 /*80*/ 0,
2119        0,
2120        0,
2121        0,
2122        MatLoad_SeqBAIJ,
2123 /*85*/ 0,
2124        0,
2125        0,
2126        0,
2127        0,
2128 /*90*/ 0,
2129        0,
2130        0,
2131        0,
2132        0,
2133 /*95*/ 0,
2134        0,
2135        0,
2136        0,
2137        0,
2138 /*100*/0,
2139        0,
2140        0,
2141        0,
2142        0,
2143 /*105*/0,
2144        MatRealPart_SeqBAIJ,
2145        MatImaginaryPart_SeqBAIJ
2146 };
2147 
2148 EXTERN_C_BEGIN
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2151 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2152 {
2153   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2154   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2155   PetscErrorCode ierr;
2156 
2157   PetscFunctionBegin;
2158   if (aij->nonew != 1) {
2159     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2160   }
2161 
2162   /* allocate space for values if not already there */
2163   if (!aij->saved_values) {
2164     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2165   }
2166 
2167   /* copy values over */
2168   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 EXTERN_C_END
2172 
2173 EXTERN_C_BEGIN
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2176 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2177 {
2178   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2179   PetscErrorCode ierr;
2180   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2181 
2182   PetscFunctionBegin;
2183   if (aij->nonew != 1) {
2184     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2185   }
2186   if (!aij->saved_values) {
2187     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2188   }
2189 
2190   /* copy values over */
2191   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 EXTERN_C_END
2195 
2196 EXTERN_C_BEGIN
2197 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2198 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2199 EXTERN_C_END
2200 
2201 EXTERN_C_BEGIN
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2204 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2205 {
2206   Mat_SeqBAIJ    *b;
2207   PetscErrorCode ierr;
2208   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2209   PetscTruth     flg,skipallocation = PETSC_FALSE;
2210 
2211   PetscFunctionBegin;
2212 
2213   if (nz == MAT_SKIP_ALLOCATION) {
2214     skipallocation = PETSC_TRUE;
2215     nz             = 0;
2216   }
2217   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2218   if (nnz && newbs != bs) {
2219     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2220   }
2221   bs   = newbs;
2222 
2223   B->rmap.bs = B->cmap.bs = bs;
2224   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2225   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2226 
2227   B->preallocated = PETSC_TRUE;
2228 
2229   mbs  = B->rmap.n/bs;
2230   nbs  = B->cmap.n/bs;
2231   bs2  = bs*bs;
2232 
2233   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2234     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2235   }
2236 
2237   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2238   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2239   if (nnz) {
2240     for (i=0; i<mbs; i++) {
2241       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2242       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2243     }
2244   }
2245 
2246   b       = (Mat_SeqBAIJ*)B->data;
2247   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2248   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2249   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2250   if (!flg) {
2251     switch (bs) {
2252     case 1:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2254       B->ops->mult            = MatMult_SeqBAIJ_1;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2256       break;
2257     case 2:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2259       B->ops->mult            = MatMult_SeqBAIJ_2;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2262       break;
2263     case 3:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2265       B->ops->mult            = MatMult_SeqBAIJ_3;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2268       break;
2269     case 4:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2271       B->ops->mult            = MatMult_SeqBAIJ_4;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2274       break;
2275     case 5:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2277       B->ops->mult            = MatMult_SeqBAIJ_5;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2279       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2280       break;
2281     case 6:
2282       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2283       B->ops->mult            = MatMult_SeqBAIJ_6;
2284       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2285       break;
2286     case 7:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2288       B->ops->mult            = MatMult_SeqBAIJ_7;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2290       break;
2291     default:
2292       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2293       B->ops->mult            = MatMult_SeqBAIJ_N;
2294       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2295       break;
2296     }
2297   }
2298   B->rmap.bs      = bs;
2299   b->mbs     = mbs;
2300   b->nbs     = nbs;
2301   if (!skipallocation) {
2302     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2303     /* b->ilen will count nonzeros in each block row so far. */
2304     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2305     if (!nnz) {
2306       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2307       else if (nz <= 0)        nz = 1;
2308       for (i=0; i<mbs; i++) b->imax[i] = nz;
2309       nz = nz*mbs;
2310     } else {
2311       nz = 0;
2312       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2313     }
2314 
2315     /* allocate the matrix space */
2316     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2317     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2318     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2319     b->singlemalloc = PETSC_TRUE;
2320 
2321     b->i[0] = 0;
2322     for (i=1; i<mbs+1; i++) {
2323       b->i[i] = b->i[i-1] + b->imax[i-1];
2324     }
2325     b->free_a     = PETSC_TRUE;
2326     b->free_ij    = PETSC_TRUE;
2327   } else {
2328     b->free_a     = PETSC_FALSE;
2329     b->free_ij    = PETSC_FALSE;
2330   }
2331 
2332   B->rmap.bs          = bs;
2333   b->bs2              = bs2;
2334   b->mbs              = mbs;
2335   b->nz               = 0;
2336   b->maxnz            = nz*bs2;
2337   B->info.nz_unneeded = (PetscReal)b->maxnz;
2338   PetscFunctionReturn(0);
2339 }
2340 EXTERN_C_END
2341 
2342 /*MC
2343    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2344    block sparse compressed row format.
2345 
2346    Options Database Keys:
2347 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2348 
2349   Level: beginner
2350 
2351 .seealso: MatCreateSeqBAIJ()
2352 M*/
2353 
2354 EXTERN_C_BEGIN
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatCreate_SeqBAIJ"
2357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2358 {
2359   PetscErrorCode ierr;
2360   PetscMPIInt    size;
2361   Mat_SeqBAIJ    *b;
2362 
2363   PetscFunctionBegin;
2364   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2365   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2366 
2367   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2368   B->data = (void*)b;
2369   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2370   B->factor           = 0;
2371   B->mapping          = 0;
2372   b->row              = 0;
2373   b->col              = 0;
2374   b->icol             = 0;
2375   b->reallocs         = 0;
2376   b->saved_values     = 0;
2377 #if defined(PETSC_USE_MAT_SINGLE)
2378   b->setvalueslen     = 0;
2379   b->setvaluescopy    = PETSC_NULL;
2380 #endif
2381 
2382   b->sorted           = PETSC_FALSE;
2383   b->roworiented      = PETSC_TRUE;
2384   b->nonew            = 0;
2385   b->diag             = 0;
2386   b->solve_work       = 0;
2387   b->mult_work        = 0;
2388   B->spptr            = 0;
2389   B->info.nz_unneeded = (PetscReal)b->maxnz;
2390   b->keepzeroedrows   = PETSC_FALSE;
2391   b->xtoy              = 0;
2392   b->XtoY              = 0;
2393   b->compressedrow.use     = PETSC_FALSE;
2394   b->compressedrow.nrows   = 0;
2395   b->compressedrow.i       = PETSC_NULL;
2396   b->compressedrow.rindex  = PETSC_NULL;
2397   b->compressedrow.checked = PETSC_FALSE;
2398   B->same_nonzero          = PETSC_FALSE;
2399 
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2401                                      "MatInvertBlockDiagonal_SeqBAIJ",
2402                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2404                                      "MatStoreValues_SeqBAIJ",
2405                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2407                                      "MatRetrieveValues_SeqBAIJ",
2408                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2410                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2411                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2413                                      "MatConvert_SeqBAIJ_SeqAIJ",
2414                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2416                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2417                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2419                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2420                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 EXTERN_C_END
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2427 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2428 {
2429   Mat            C;
2430   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2431   PetscErrorCode ierr;
2432   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2433 
2434   PetscFunctionBegin;
2435   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2436 
2437   *B = 0;
2438   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2439   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2440   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2441   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2442   c    = (Mat_SeqBAIJ*)C->data;
2443 
2444   C->rmap.N   = A->rmap.N;
2445   C->cmap.N   = A->cmap.N;
2446   C->rmap.bs  = A->rmap.bs;
2447   c->bs2 = a->bs2;
2448   c->mbs = a->mbs;
2449   c->nbs = a->nbs;
2450 
2451   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2452   for (i=0; i<mbs; i++) {
2453     c->imax[i] = a->imax[i];
2454     c->ilen[i] = a->ilen[i];
2455   }
2456 
2457   /* allocate the matrix space */
2458   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2459   c->singlemalloc = PETSC_TRUE;
2460   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2461   if (mbs > 0) {
2462     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2463     if (cpvalues == MAT_COPY_VALUES) {
2464       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2465     } else {
2466       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2467     }
2468   }
2469   c->sorted      = a->sorted;
2470   c->roworiented = a->roworiented;
2471   c->nonew       = a->nonew;
2472 
2473   if (a->diag) {
2474     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2475     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476     for (i=0; i<mbs; i++) {
2477       c->diag[i] = a->diag[i];
2478     }
2479   } else c->diag        = 0;
2480   c->nz                 = a->nz;
2481   c->maxnz              = a->maxnz;
2482   c->solve_work         = 0;
2483   c->mult_work          = 0;
2484   c->free_a             = PETSC_TRUE;
2485   c->free_ij            = PETSC_TRUE;
2486   C->preallocated       = PETSC_TRUE;
2487   C->assembled          = PETSC_TRUE;
2488 
2489   c->compressedrow.use     = a->compressedrow.use;
2490   c->compressedrow.nrows   = a->compressedrow.nrows;
2491   c->compressedrow.checked = a->compressedrow.checked;
2492   if ( a->compressedrow.checked && a->compressedrow.use){
2493     i = a->compressedrow.nrows;
2494     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2495     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2496     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2497     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2498   } else {
2499     c->compressedrow.use    = PETSC_FALSE;
2500     c->compressedrow.i      = PETSC_NULL;
2501     c->compressedrow.rindex = PETSC_NULL;
2502   }
2503   C->same_nonzero = A->same_nonzero;
2504   *B = C;
2505   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatLoad_SeqBAIJ"
2511 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2512 {
2513   Mat_SeqBAIJ    *a;
2514   Mat            B;
2515   PetscErrorCode ierr;
2516   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2517   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2518   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2519   PetscInt       *masked,nmask,tmp,bs2,ishift;
2520   PetscMPIInt    size;
2521   int            fd;
2522   PetscScalar    *aa;
2523   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2527   bs2  = bs*bs;
2528 
2529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2530   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2531   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2532   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2533   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2534   M = header[1]; N = header[2]; nz = header[3];
2535 
2536   if (header[3] < 0) {
2537     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2538   }
2539 
2540   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2541 
2542   /*
2543      This code adds extra rows to make sure the number of rows is
2544     divisible by the blocksize
2545   */
2546   mbs        = M/bs;
2547   extra_rows = bs - M + bs*(mbs);
2548   if (extra_rows == bs) extra_rows = 0;
2549   else                  mbs++;
2550   if (extra_rows) {
2551     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2552   }
2553 
2554   /* read in row lengths */
2555   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2556   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2557   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2558 
2559   /* read in column indices */
2560   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2561   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2562   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2563 
2564   /* loop over row lengths determining block row lengths */
2565   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2566   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2567   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2568   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2569   masked   = mask + mbs;
2570   rowcount = 0; nzcount = 0;
2571   for (i=0; i<mbs; i++) {
2572     nmask = 0;
2573     for (j=0; j<bs; j++) {
2574       kmax = rowlengths[rowcount];
2575       for (k=0; k<kmax; k++) {
2576         tmp = jj[nzcount++]/bs;
2577         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2578       }
2579       rowcount++;
2580     }
2581     browlengths[i] += nmask;
2582     /* zero out the mask elements we set */
2583     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2584   }
2585 
2586   /* create our matrix */
2587   ierr = MatCreate(comm,&B);
2588   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2589   ierr = MatSetType(B,type);CHKERRQ(ierr);
2590   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2591   a = (Mat_SeqBAIJ*)B->data;
2592 
2593   /* set matrix "i" values */
2594   a->i[0] = 0;
2595   for (i=1; i<= mbs; i++) {
2596     a->i[i]      = a->i[i-1] + browlengths[i-1];
2597     a->ilen[i-1] = browlengths[i-1];
2598   }
2599   a->nz         = 0;
2600   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2601 
2602   /* read in nonzero values */
2603   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2604   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2605   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2606 
2607   /* set "a" and "j" values into matrix */
2608   nzcount = 0; jcount = 0;
2609   for (i=0; i<mbs; i++) {
2610     nzcountb = nzcount;
2611     nmask    = 0;
2612     for (j=0; j<bs; j++) {
2613       kmax = rowlengths[i*bs+j];
2614       for (k=0; k<kmax; k++) {
2615         tmp = jj[nzcount++]/bs;
2616 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2617       }
2618     }
2619     /* sort the masked values */
2620     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2621 
2622     /* set "j" values into matrix */
2623     maskcount = 1;
2624     for (j=0; j<nmask; j++) {
2625       a->j[jcount++]  = masked[j];
2626       mask[masked[j]] = maskcount++;
2627     }
2628     /* set "a" values into matrix */
2629     ishift = bs2*a->i[i];
2630     for (j=0; j<bs; j++) {
2631       kmax = rowlengths[i*bs+j];
2632       for (k=0; k<kmax; k++) {
2633         tmp       = jj[nzcountb]/bs ;
2634         block     = mask[tmp] - 1;
2635         point     = jj[nzcountb] - bs*tmp;
2636         idx       = ishift + bs2*block + j + bs*point;
2637         a->a[idx] = (MatScalar)aa[nzcountb++];
2638       }
2639     }
2640     /* zero out the mask elements we set */
2641     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2642   }
2643   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2644 
2645   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2646   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2647   ierr = PetscFree(aa);CHKERRQ(ierr);
2648   ierr = PetscFree(jj);CHKERRQ(ierr);
2649   ierr = PetscFree(mask);CHKERRQ(ierr);
2650 
2651   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2653   ierr = MatView_Private(B);CHKERRQ(ierr);
2654 
2655   *A = B;
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatCreateSeqBAIJ"
2661 /*@C
2662    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2663    compressed row) format.  For good matrix assembly performance the
2664    user should preallocate the matrix storage by setting the parameter nz
2665    (or the array nnz).  By setting these parameters accurately, performance
2666    during matrix assembly can be increased by more than a factor of 50.
2667 
2668    Collective on MPI_Comm
2669 
2670    Input Parameters:
2671 +  comm - MPI communicator, set to PETSC_COMM_SELF
2672 .  bs - size of block
2673 .  m - number of rows
2674 .  n - number of columns
2675 .  nz - number of nonzero blocks  per block row (same for all rows)
2676 -  nnz - array containing the number of nonzero blocks in the various block rows
2677          (possibly different for each block row) or PETSC_NULL
2678 
2679    Output Parameter:
2680 .  A - the matrix
2681 
2682    Options Database Keys:
2683 .   -mat_no_unroll - uses code that does not unroll the loops in the
2684                      block calculations (much slower)
2685 .    -mat_block_size - size of the blocks to use
2686 
2687    Level: intermediate
2688 
2689    Notes:
2690    The number of rows and columns must be divisible by blocksize.
2691 
2692    If the nnz parameter is given then the nz parameter is ignored
2693 
2694    A nonzero block is any block that as 1 or more nonzeros in it
2695 
2696    The block AIJ format is fully compatible with standard Fortran 77
2697    storage.  That is, the stored row and column indices can begin at
2698    either one (as in Fortran) or zero.  See the users' manual for details.
2699 
2700    Specify the preallocated storage with either nz or nnz (not both).
2701    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2702    allocation.  For additional details, see the users manual chapter on
2703    matrices.
2704 
2705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2706 @*/
2707 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2708 {
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2713   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2714   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2715   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 #undef __FUNCT__
2720 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2721 /*@C
2722    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2723    per row in the matrix. For good matrix assembly performance the
2724    user should preallocate the matrix storage by setting the parameter nz
2725    (or the array nnz).  By setting these parameters accurately, performance
2726    during matrix assembly can be increased by more than a factor of 50.
2727 
2728    Collective on MPI_Comm
2729 
2730    Input Parameters:
2731 +  A - the matrix
2732 .  bs - size of block
2733 .  nz - number of block nonzeros per block row (same for all rows)
2734 -  nnz - array containing the number of block nonzeros in the various block rows
2735          (possibly different for each block row) or PETSC_NULL
2736 
2737    Options Database Keys:
2738 .   -mat_no_unroll - uses code that does not unroll the loops in the
2739                      block calculations (much slower)
2740 .    -mat_block_size - size of the blocks to use
2741 
2742    Level: intermediate
2743 
2744    Notes:
2745    If the nnz parameter is given then the nz parameter is ignored
2746 
2747    The block AIJ format is fully compatible with standard Fortran 77
2748    storage.  That is, the stored row and column indices can begin at
2749    either one (as in Fortran) or zero.  See the users' manual for details.
2750 
2751    Specify the preallocated storage with either nz or nnz (not both).
2752    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2753    allocation.  For additional details, see the users manual chapter on
2754    matrices.
2755 
2756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2757 @*/
2758 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2759 {
2760   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2761 
2762   PetscFunctionBegin;
2763   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2764   if (f) {
2765     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2766   }
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2772 /*@
2773      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2774               (upper triangular entries in CSR format) provided by the user.
2775 
2776      Collective on MPI_Comm
2777 
2778    Input Parameters:
2779 +  comm - must be an MPI communicator of size 1
2780 .  bs - size of block
2781 .  m - number of rows
2782 .  n - number of columns
2783 .  i - row indices
2784 .  j - column indices
2785 -  a - matrix values
2786 
2787    Output Parameter:
2788 .  mat - the matrix
2789 
2790    Level: intermediate
2791 
2792    Notes:
2793        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2794     once the matrix is destroyed
2795 
2796        You cannot set new nonzero locations into this matrix, that will generate an error.
2797 
2798        The i and j indices are 0 based
2799 
2800 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2801 
2802 @*/
2803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2804 {
2805   PetscErrorCode ierr;
2806   PetscInt       ii;
2807   Mat_SeqBAIJ    *baij;
2808 
2809   PetscFunctionBegin;
2810   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2811   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2812 
2813   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2814   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2815   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2816   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2817   baij = (Mat_SeqBAIJ*)(*mat)->data;
2818   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2819 
2820   baij->i = i;
2821   baij->j = j;
2822   baij->a = a;
2823   baij->singlemalloc = PETSC_FALSE;
2824   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2825   baij->free_a       = PETSC_FALSE;
2826   baij->free_ij       = PETSC_FALSE;
2827 
2828   for (ii=0; ii<m; ii++) {
2829     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2830 #if defined(PETSC_USE_DEBUG)
2831     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2832 #endif
2833   }
2834 #if defined(PETSC_USE_DEBUG)
2835   for (ii=0; ii<baij->i[m]; ii++) {
2836     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2837     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2838   }
2839 #endif
2840 
2841   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845