xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7c99f97c3f3bba52d196ca028dc0b3087b9b38b8)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4 + l)*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (!a->diag) {
760     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
761   }
762   for (i=0; i<m; i++) {
763     a->diag[i] = a->i[i+1];
764     for (j=a->i[i]; j<a->i[i+1]; j++) {
765       if (a->j[j] == i) {
766         a->diag[i] = j;
767         break;
768       }
769     }
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 
775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
780 {
781   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
782   PetscErrorCode ierr;
783   PetscInt       n = a->mbs,i;
784 
785   PetscFunctionBegin;
786   *nn = n;
787   if (!ia) PetscFunctionReturn(0);
788   if (symmetric) {
789     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
790   } else if (oshift == 1) {
791     /* temporarily add 1 to i and j indices */
792     PetscInt nz = a->i[n];
793     for (i=0; i<nz; i++) a->j[i]++;
794     for (i=0; i<n+1; i++) a->i[i]++;
795     *ia = a->i; *ja = a->j;
796   } else {
797     *ia = a->i; *ja = a->j;
798   }
799 
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
806 {
807   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       i,n = a->mbs;
810 
811   PetscFunctionBegin;
812   if (!ia) PetscFunctionReturn(0);
813   if (symmetric) {
814     ierr = PetscFree(*ia);CHKERRQ(ierr);
815     ierr = PetscFree(*ja);CHKERRQ(ierr);
816   } else if (oshift == 1) {
817     PetscInt nz = a->i[n]-1;
818     for (i=0; i<nz; i++) a->j[i]--;
819     for (i=0; i<n+1; i++) a->i[i]--;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_SeqBAIJ"
826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
834 #endif
835   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
836   if (a->row) {
837     ierr = ISDestroy(a->row);CHKERRQ(ierr);
838   }
839   if (a->col) {
840     ierr = ISDestroy(a->col);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(a->diag);CHKERRQ(ierr);
843   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
844   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
845   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
846   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
847   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
848   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
849 #if defined(PETSC_USE_MAT_SINGLE)
850   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
851 #endif
852   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
853   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
854 
855   ierr = PetscFree(a);CHKERRQ(ierr);
856 
857   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetOption_SeqBAIJ"
870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871 {
872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   switch (op) {
877   case MAT_ROW_ORIENTED:
878     a->roworiented    = PETSC_TRUE;
879     break;
880   case MAT_COLUMN_ORIENTED:
881     a->roworiented    = PETSC_FALSE;
882     break;
883   case MAT_COLUMNS_SORTED:
884     a->sorted         = PETSC_TRUE;
885     break;
886   case MAT_COLUMNS_UNSORTED:
887     a->sorted         = PETSC_FALSE;
888     break;
889   case MAT_KEEP_ZEROED_ROWS:
890     a->keepzeroedrows = PETSC_TRUE;
891     break;
892   case MAT_NO_NEW_NONZERO_LOCATIONS:
893     a->nonew          = 1;
894     break;
895   case MAT_NEW_NONZERO_LOCATION_ERR:
896     a->nonew          = -1;
897     break;
898   case MAT_NEW_NONZERO_ALLOCATION_ERR:
899     a->nonew          = -2;
900     break;
901   case MAT_YES_NEW_NONZERO_LOCATIONS:
902     a->nonew          = 0;
903     break;
904   case MAT_ROWS_SORTED:
905   case MAT_ROWS_UNSORTED:
906   case MAT_YES_NEW_DIAGONALS:
907   case MAT_IGNORE_OFF_PROC_ENTRIES:
908   case MAT_USE_HASH_TABLE:
909     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
910     break;
911   case MAT_NO_NEW_DIAGONALS:
912     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913   case MAT_SYMMETRIC:
914   case MAT_STRUCTURALLY_SYMMETRIC:
915   case MAT_NOT_SYMMETRIC:
916   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917   case MAT_HERMITIAN:
918   case MAT_NOT_HERMITIAN:
919   case MAT_SYMMETRY_ETERNAL:
920   case MAT_NOT_SYMMETRY_ETERNAL:
921     break;
922   default:
923     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetRow_SeqBAIJ"
930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
931 {
932   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
935   MatScalar      *aa,*aa_i;
936   PetscScalar    *v_i;
937 
938   PetscFunctionBegin;
939   bs  = A->rmap.bs;
940   ai  = a->i;
941   aj  = a->j;
942   aa  = a->a;
943   bs2 = a->bs2;
944 
945   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
946 
947   bn  = row/bs;   /* Block number */
948   bp  = row % bs; /* Block Position */
949   M   = ai[bn+1] - ai[bn];
950   *nz = bs*M;
951 
952   if (v) {
953     *v = 0;
954     if (*nz) {
955       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
956       for (i=0; i<M; i++) { /* for each block in the block row */
957         v_i  = *v + i*bs;
958         aa_i = aa + bs2*(ai[bn] + i);
959         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
960       }
961     }
962   }
963 
964   if (idx) {
965     *idx = 0;
966     if (*nz) {
967       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
968       for (i=0; i<M; i++) { /* for each block in the block row */
969         idx_i = *idx + i*bs;
970         itmp  = bs*aj[ai[bn] + i];
971         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
972       }
973     }
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
986   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatTranspose_SeqBAIJ"
992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
993 {
994   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
995   Mat            C;
996   PetscErrorCode ierr;
997   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
998   PetscInt       *rows,*cols,bs2=a->bs2;
999   PetscScalar    *array;
1000 
1001   PetscFunctionBegin;
1002   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1003   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1004   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1005 
1006 #if defined(PETSC_USE_MAT_SINGLE)
1007   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1008   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1009 #else
1010   array = a->a;
1011 #endif
1012 
1013   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1014   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1015   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1016   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1017   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1018   ierr = PetscFree(col);CHKERRQ(ierr);
1019   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1020   cols = rows + bs;
1021   for (i=0; i<mbs; i++) {
1022     cols[0] = i*bs;
1023     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1024     len = ai[i+1] - ai[i];
1025     for (j=0; j<len; j++) {
1026       rows[0] = (*aj++)*bs;
1027       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1028       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1029       array += bs2;
1030     }
1031   }
1032   ierr = PetscFree(rows);CHKERRQ(ierr);
1033 #if defined(PETSC_USE_MAT_SINGLE)
1034   ierr = PetscFree(array);
1035 #endif
1036 
1037   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039 
1040   if (B) {
1041     *B = C;
1042   } else {
1043     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1051 {
1052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1055   int            fd;
1056   PetscScalar    *aa;
1057   FILE           *file;
1058 
1059   PetscFunctionBegin;
1060   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1061   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1062   col_lens[0] = MAT_FILE_COOKIE;
1063 
1064   col_lens[1] = A->rmap.N;
1065   col_lens[2] = A->cmap.n;
1066   col_lens[3] = a->nz*bs2;
1067 
1068   /* store lengths of each row and write (including header) to file */
1069   count = 0;
1070   for (i=0; i<a->mbs; i++) {
1071     for (j=0; j<bs; j++) {
1072       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1073     }
1074   }
1075   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1076   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1077 
1078   /* store column indices (zero start index) */
1079   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1080   count = 0;
1081   for (i=0; i<a->mbs; i++) {
1082     for (j=0; j<bs; j++) {
1083       for (k=a->i[i]; k<a->i[i+1]; k++) {
1084         for (l=0; l<bs; l++) {
1085           jj[count++] = bs*a->j[k] + l;
1086         }
1087       }
1088     }
1089   }
1090   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1091   ierr = PetscFree(jj);CHKERRQ(ierr);
1092 
1093   /* store nonzero values */
1094   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1095   count = 0;
1096   for (i=0; i<a->mbs; i++) {
1097     for (j=0; j<bs; j++) {
1098       for (k=a->i[i]; k<a->i[i+1]; k++) {
1099         for (l=0; l<bs; l++) {
1100           aa[count++] = a->a[bs2*k + l*bs + j];
1101         }
1102       }
1103     }
1104   }
1105   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1106   ierr = PetscFree(aa);CHKERRQ(ierr);
1107 
1108   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1109   if (file) {
1110     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1118 {
1119   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1120   PetscErrorCode    ierr;
1121   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1122   PetscViewerFormat format;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1126   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1127     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1128   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1129     Mat aij;
1130     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1131     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1132     ierr = MatDestroy(aij);CHKERRQ(ierr);
1133   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1134      PetscFunctionReturn(0);
1135   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1136     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1137     for (i=0; i<a->mbs; i++) {
1138       for (j=0; j<bs; j++) {
1139         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1140         for (k=a->i[i]; k<a->i[i+1]; k++) {
1141           for (l=0; l<bs; l++) {
1142 #if defined(PETSC_USE_COMPLEX)
1143             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1145                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1146             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             }
1152 #else
1153             if (a->a[bs2*k + l*bs + j] != 0.0) {
1154               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1155             }
1156 #endif
1157           }
1158         }
1159         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1160       }
1161     }
1162     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1163   } else {
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1165     for (i=0; i<a->mbs; i++) {
1166       for (j=0; j<bs; j++) {
1167         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1168         for (k=a->i[i]; k<a->i[i+1]; k++) {
1169           for (l=0; l<bs; l++) {
1170 #if defined(PETSC_USE_COMPLEX)
1171             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1173                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             }
1180 #else
1181             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1182 #endif
1183           }
1184         }
1185         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1186       }
1187     }
1188     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1189   }
1190   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1197 {
1198   Mat            A = (Mat) Aa;
1199   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1202   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1203   MatScalar      *aa;
1204   PetscViewer    viewer;
1205 
1206   PetscFunctionBegin;
1207 
1208   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1209   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1210 
1211   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1212 
1213   /* loop over matrix elements drawing boxes */
1214   color = PETSC_DRAW_BLUE;
1215   for (i=0,row=0; i<mbs; i++,row+=bs) {
1216     for (j=a->i[i]; j<a->i[i+1]; j++) {
1217       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1218       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219       aa = a->a + j*bs2;
1220       for (k=0; k<bs; k++) {
1221         for (l=0; l<bs; l++) {
1222           if (PetscRealPart(*aa++) >=  0.) continue;
1223           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1224         }
1225       }
1226     }
1227   }
1228   color = PETSC_DRAW_CYAN;
1229   for (i=0,row=0; i<mbs; i++,row+=bs) {
1230     for (j=a->i[i]; j<a->i[i+1]; j++) {
1231       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1232       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1233       aa = a->a + j*bs2;
1234       for (k=0; k<bs; k++) {
1235         for (l=0; l<bs; l++) {
1236           if (PetscRealPart(*aa++) != 0.) continue;
1237           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1238         }
1239       }
1240     }
1241   }
1242 
1243   color = PETSC_DRAW_RED;
1244   for (i=0,row=0; i<mbs; i++,row+=bs) {
1245     for (j=a->i[i]; j<a->i[i+1]; j++) {
1246       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1247       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1248       aa = a->a + j*bs2;
1249       for (k=0; k<bs; k++) {
1250         for (l=0; l<bs; l++) {
1251           if (PetscRealPart(*aa++) <= 0.) continue;
1252           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1253         }
1254       }
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1263 {
1264   PetscErrorCode ierr;
1265   PetscReal      xl,yl,xr,yr,w,h;
1266   PetscDraw      draw;
1267   PetscTruth     isnull;
1268 
1269   PetscFunctionBegin;
1270 
1271   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1272   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1273 
1274   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1275   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1276   xr += w;    yr += h;  xl = -w;     yl = -h;
1277   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1278   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1279   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ"
1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscTruth     iascii,isbinary,isdraw;
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1294   if (iascii){
1295     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1296   } else if (isbinary) {
1297     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1298   } else if (isdraw) {
1299     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1300   } else {
1301     Mat B;
1302     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1303     ierr = MatView(B,viewer);CHKERRQ(ierr);
1304     ierr = MatDestroy(B);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1313 {
1314   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1316   PetscInt    *ai = a->i,*ailen = a->ilen;
1317   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1318   MatScalar   *ap,*aa = a->a,zero = 0.0;
1319 
1320   PetscFunctionBegin;
1321   for (k=0; k<m; k++) { /* loop over rows */
1322     row  = im[k]; brow = row/bs;
1323     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1324     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1325     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1326     nrow = ailen[brow];
1327     for (l=0; l<n; l++) { /* loop over columns */
1328       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1329       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1330       col  = in[l] ;
1331       bcol = col/bs;
1332       cidx = col%bs;
1333       ridx = row%bs;
1334       high = nrow;
1335       low  = 0; /* assume unsorted */
1336       while (high-low > 5) {
1337         t = (low+high)/2;
1338         if (rp[t] > bcol) high = t;
1339         else             low  = t;
1340       }
1341       for (i=low; i<high; i++) {
1342         if (rp[i] > bcol) break;
1343         if (rp[i] == bcol) {
1344           *v++ = ap[bs2*i+bs*cidx+ridx];
1345           goto finished;
1346         }
1347       }
1348       *v++ = zero;
1349       finished:;
1350     }
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #if defined(PETSC_USE_MAT_SINGLE)
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1359 {
1360   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1361   PetscErrorCode ierr;
1362   PetscInt       i,N = m*n*b->bs2;
1363   MatScalar      *vsingle;
1364 
1365   PetscFunctionBegin;
1366   if (N > b->setvalueslen) {
1367     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1368     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1369     b->setvalueslen  = N;
1370   }
1371   vsingle = b->setvaluescopy;
1372   for (i=0; i<N; i++) {
1373     vsingle[i] = v[i];
1374   }
1375   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 #endif
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1384 {
1385   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1386   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1387   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1388   PetscErrorCode  ierr;
1389   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1390   PetscTruth      roworiented=a->roworiented;
1391   const MatScalar *value = v;
1392   MatScalar       *ap,*aa = a->a,*bap;
1393 
1394   PetscFunctionBegin;
1395   if (roworiented) {
1396     stepval = (n-1)*bs;
1397   } else {
1398     stepval = (m-1)*bs;
1399   }
1400   for (k=0; k<m; k++) { /* loop over added rows */
1401     row  = im[k];
1402     if (row < 0) continue;
1403 #if defined(PETSC_USE_DEBUG)
1404     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1405 #endif
1406     rp   = aj + ai[row];
1407     ap   = aa + bs2*ai[row];
1408     rmax = imax[row];
1409     nrow = ailen[row];
1410     low  = 0;
1411     high = nrow;
1412     for (l=0; l<n; l++) { /* loop over added columns */
1413       if (in[l] < 0) continue;
1414 #if defined(PETSC_USE_DEBUG)
1415       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1416 #endif
1417       col = in[l];
1418       if (roworiented) {
1419         value = v + k*(stepval+bs)*bs + l*bs;
1420       } else {
1421         value = v + l*(stepval+bs)*bs + k*bs;
1422       }
1423       if (col <= lastcol) low = 0; else high = nrow;
1424       lastcol = col;
1425       while (high-low > 7) {
1426         t = (low+high)/2;
1427         if (rp[t] > col) high = t;
1428         else             low  = t;
1429       }
1430       for (i=low; i<high; i++) {
1431         if (rp[i] > col) break;
1432         if (rp[i] == col) {
1433           bap  = ap +  bs2*i;
1434           if (roworiented) {
1435             if (is == ADD_VALUES) {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] += *value++;
1439                 }
1440               }
1441             } else {
1442               for (ii=0; ii<bs; ii++,value+=stepval) {
1443                 for (jj=ii; jj<bs2; jj+=bs) {
1444                   bap[jj] = *value++;
1445                 }
1446               }
1447             }
1448           } else {
1449             if (is == ADD_VALUES) {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++ += *value++;
1453                 }
1454               }
1455             } else {
1456               for (ii=0; ii<bs; ii++,value+=stepval) {
1457                 for (jj=0; jj<bs; jj++) {
1458                   *bap++  = *value++;
1459                 }
1460               }
1461             }
1462           }
1463           goto noinsert2;
1464         }
1465       }
1466       if (nonew == 1) goto noinsert2;
1467       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1469       N = nrow++ - 1; high++;
1470       /* shift up all the later entries in this row */
1471       for (ii=N; ii>=i; ii--) {
1472         rp[ii+1] = rp[ii];
1473         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1474       }
1475       if (N >= i) {
1476         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477       }
1478       rp[i] = col;
1479       bap   = ap +  bs2*i;
1480       if (roworiented) {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=ii; jj<bs2; jj+=bs) {
1483             bap[jj] = *value++;
1484           }
1485         }
1486       } else {
1487         for (ii=0; ii<bs; ii++,value+=stepval) {
1488           for (jj=0; jj<bs; jj++) {
1489             *bap++  = *value++;
1490           }
1491         }
1492       }
1493       noinsert2:;
1494       low = i;
1495     }
1496     ailen[row] = nrow;
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1504 {
1505   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1506   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1507   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1508   PetscErrorCode ierr;
1509   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1510   MatScalar      *aa = a->a,*ap;
1511   PetscReal      ratio=0.6;
1512 
1513   PetscFunctionBegin;
1514   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1515 
1516   if (m) rmax = ailen[0];
1517   for (i=1; i<mbs; i++) {
1518     /* move each row back by the amount of empty slots (fshift) before it*/
1519     fshift += imax[i-1] - ailen[i-1];
1520     rmax   = PetscMax(rmax,ailen[i]);
1521     if (fshift) {
1522       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1523       N = ailen[i];
1524       for (j=0; j<N; j++) {
1525         ip[j-fshift] = ip[j];
1526         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1527       }
1528     }
1529     ai[i] = ai[i-1] + ailen[i-1];
1530   }
1531   if (mbs) {
1532     fshift += imax[mbs-1] - ailen[mbs-1];
1533     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1534   }
1535   /* reset ilen and imax for each row */
1536   for (i=0; i<mbs; i++) {
1537     ailen[i] = imax[i] = ai[i+1] - ai[i];
1538   }
1539   a->nz = ai[mbs];
1540 
1541   /* diagonals may have moved, so kill the diagonal pointers */
1542   a->idiagvalid = PETSC_FALSE;
1543   if (fshift && a->diag) {
1544     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1545     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1546     a->diag = 0;
1547   }
1548   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1549   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1550   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1551   a->reallocs          = 0;
1552   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1553 
1554   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1555   if (a->compressedrow.use){
1556     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1557   }
1558 
1559   A->same_nonzero = PETSC_TRUE;
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 /*
1564    This function returns an array of flags which indicate the locations of contiguous
1565    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1566    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1567    Assume: sizes should be long enough to hold all the values.
1568 */
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1572 {
1573   PetscInt   i,j,k,row;
1574   PetscTruth flg;
1575 
1576   PetscFunctionBegin;
1577   for (i=0,j=0; i<n; j++) {
1578     row = idx[i];
1579     if (row%bs!=0) { /* Not the begining of a block */
1580       sizes[j] = 1;
1581       i++;
1582     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1583       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1584       i++;
1585     } else { /* Begining of the block, so check if the complete block exists */
1586       flg = PETSC_TRUE;
1587       for (k=1; k<bs; k++) {
1588         if (row+k != idx[i+k]) { /* break in the block */
1589           flg = PETSC_FALSE;
1590           break;
1591         }
1592       }
1593       if (flg) { /* No break in the bs */
1594         sizes[j] = bs;
1595         i+= bs;
1596       } else {
1597         sizes[j] = 1;
1598         i++;
1599       }
1600     }
1601   }
1602   *bs_max = j;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1609 {
1610   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1611   PetscErrorCode ierr;
1612   PetscInt       i,j,k,count,*rows;
1613   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1614   PetscScalar    zero = 0.0;
1615   MatScalar      *aa;
1616 
1617   PetscFunctionBegin;
1618   /* Make a copy of the IS and  sort it */
1619   /* allocate memory for rows,sizes */
1620   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1621   sizes = rows + is_n;
1622 
1623   /* copy IS values to rows, and sort them */
1624   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1625   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1626   if (baij->keepzeroedrows) {
1627     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1628     bs_max = is_n;
1629     A->same_nonzero = PETSC_TRUE;
1630   } else {
1631     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1632     A->same_nonzero = PETSC_FALSE;
1633   }
1634 
1635   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1636     row   = rows[j];
1637     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1638     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1639     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1640     if (sizes[i] == bs && !baij->keepzeroedrows) {
1641       if (diag != 0.0) {
1642         if (baij->ilen[row/bs] > 0) {
1643           baij->ilen[row/bs]       = 1;
1644           baij->j[baij->i[row/bs]] = row/bs;
1645           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1646         }
1647         /* Now insert all the diagonal values for this bs */
1648         for (k=0; k<bs; k++) {
1649           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1650         }
1651       } else { /* (diag == 0.0) */
1652         baij->ilen[row/bs] = 0;
1653       } /* end (diag == 0.0) */
1654     } else { /* (sizes[i] != bs) */
1655 #if defined (PETSC_USE_DEBUG)
1656       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1657 #endif
1658       for (k=0; k<count; k++) {
1659         aa[0] =  zero;
1660         aa    += bs;
1661       }
1662       if (diag != 0.0) {
1663         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664       }
1665     }
1666   }
1667 
1668   ierr = PetscFree(rows);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1676 {
1677   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1678   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1679   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1680   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1681   PetscErrorCode ierr;
1682   PetscInt       ridx,cidx,bs2=a->bs2;
1683   PetscTruth     roworiented=a->roworiented;
1684   MatScalar      *ap,value,*aa=a->a,*bap;
1685 
1686   PetscFunctionBegin;
1687   for (k=0; k<m; k++) { /* loop over added rows */
1688     row  = im[k];
1689     brow = row/bs;
1690     if (row < 0) continue;
1691 #if defined(PETSC_USE_DEBUG)
1692     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1693 #endif
1694     rp   = aj + ai[brow];
1695     ap   = aa + bs2*ai[brow];
1696     rmax = imax[brow];
1697     nrow = ailen[brow];
1698     low  = 0;
1699     high = nrow;
1700     for (l=0; l<n; l++) { /* loop over added columns */
1701       if (in[l] < 0) continue;
1702 #if defined(PETSC_USE_DEBUG)
1703       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1704 #endif
1705       col = in[l]; bcol = col/bs;
1706       ridx = row % bs; cidx = col % bs;
1707       if (roworiented) {
1708         value = v[l + k*n];
1709       } else {
1710         value = v[k + l*m];
1711       }
1712       if (col <= lastcol) low = 0; else high = nrow;
1713       lastcol = col;
1714       while (high-low > 7) {
1715         t = (low+high)/2;
1716         if (rp[t] > bcol) high = t;
1717         else              low  = t;
1718       }
1719       for (i=low; i<high; i++) {
1720         if (rp[i] > bcol) break;
1721         if (rp[i] == bcol) {
1722           bap  = ap +  bs2*i + bs*cidx + ridx;
1723           if (is == ADD_VALUES) *bap += value;
1724           else                  *bap  = value;
1725           goto noinsert1;
1726         }
1727       }
1728       if (nonew == 1) goto noinsert1;
1729       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1731       N = nrow++ - 1; high++;
1732       /* shift up all the later entries in this row */
1733       for (ii=N; ii>=i; ii--) {
1734         rp[ii+1] = rp[ii];
1735         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1736       }
1737       if (N>=i) {
1738         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1739       }
1740       rp[i]                      = bcol;
1741       ap[bs2*i + bs*cidx + ridx] = value;
1742       a->nz++;
1743       noinsert1:;
1744       low = i;
1745     }
1746     ailen[brow] = nrow;
1747   }
1748   A->same_nonzero = PETSC_FALSE;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1756 {
1757   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1758   Mat            outA;
1759   PetscErrorCode ierr;
1760   PetscTruth     row_identity,col_identity;
1761 
1762   PetscFunctionBegin;
1763   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1764   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1765   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1766   if (!row_identity || !col_identity) {
1767     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1768   }
1769 
1770   outA          = inA;
1771   inA->factor   = FACTOR_LU;
1772 
1773   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1774 
1775   a->row        = row;
1776   a->col        = col;
1777   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1778   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1779 
1780   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1781   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1782   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1783 
1784   /*
1785       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1786       for ILU(0) factorization with natural ordering
1787   */
1788   if (inA->rmap.bs < 8) {
1789     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1790   } else {
1791     if (!a->solve_work) {
1792       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1793       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1794     }
1795   }
1796 
1797   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1798 
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 EXTERN_C_BEGIN
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1805 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1806 {
1807   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1808   PetscInt    i,nz,nbs;
1809 
1810   PetscFunctionBegin;
1811   nz  = baij->maxnz/baij->bs2;
1812   nbs = baij->nbs;
1813   for (i=0; i<nz; i++) {
1814     baij->j[i] = indices[i];
1815   }
1816   baij->nz = nz;
1817   for (i=0; i<nbs; i++) {
1818     baij->ilen[i] = baij->imax[i];
1819   }
1820 
1821   PetscFunctionReturn(0);
1822 }
1823 EXTERN_C_END
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1827 /*@
1828     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1829        in the matrix.
1830 
1831   Input Parameters:
1832 +  mat - the SeqBAIJ matrix
1833 -  indices - the column indices
1834 
1835   Level: advanced
1836 
1837   Notes:
1838     This can be called if you have precomputed the nonzero structure of the
1839   matrix and want to provide it to the matrix object to improve the performance
1840   of the MatSetValues() operation.
1841 
1842     You MUST have set the correct numbers of nonzeros per row in the call to
1843   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1844 
1845     MUST be called before any calls to MatSetValues();
1846 
1847 @*/
1848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1849 {
1850   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1851 
1852   PetscFunctionBegin;
1853   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1854   PetscValidPointer(indices,2);
1855   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1856   if (f) {
1857     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1858   } else {
1859     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1866 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1867 {
1868   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1869   PetscErrorCode ierr;
1870   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1871   PetscReal      atmp;
1872   PetscScalar    *x,zero = 0.0;
1873   MatScalar      *aa;
1874   PetscInt       ncols,brow,krow,kcol;
1875 
1876   PetscFunctionBegin;
1877   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1878   bs   = A->rmap.bs;
1879   aa   = a->a;
1880   ai   = a->i;
1881   aj   = a->j;
1882   mbs = a->mbs;
1883 
1884   ierr = VecSet(v,zero);CHKERRQ(ierr);
1885   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1886   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1887   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1888   for (i=0; i<mbs; i++) {
1889     ncols = ai[1] - ai[0]; ai++;
1890     brow  = bs*i;
1891     for (j=0; j<ncols; j++){
1892       /* bcol = bs*(*aj); */
1893       for (kcol=0; kcol<bs; kcol++){
1894         for (krow=0; krow<bs; krow++){
1895           atmp = PetscAbsScalar(*aa); aa++;
1896           row = brow + krow;    /* row index */
1897           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1898           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1899         }
1900       }
1901       aj++;
1902     }
1903   }
1904   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatCopy_SeqBAIJ"
1910 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1911 {
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   /* If the two matrices have the same copy implementation, use fast copy. */
1916   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1917     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1918     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1919 
1920     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1921       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1922     }
1923     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1924   } else {
1925     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1932 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1933 {
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1943 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1944 {
1945   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1946   PetscFunctionBegin;
1947   *array = a->a;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1953 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1954 {
1955   PetscFunctionBegin;
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #include "petscblaslapack.h"
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1962 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1963 {
1964   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1965   PetscErrorCode ierr;
1966   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1967   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1968 
1969   PetscFunctionBegin;
1970   if (str == SAME_NONZERO_PATTERN) {
1971     PetscScalar alpha = a;
1972     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1973   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974     if (y->xtoy && y->XtoY != X) {
1975       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1976       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1977     }
1978     if (!y->xtoy) { /* get xtoy */
1979       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1980       y->XtoY = X;
1981     }
1982     bs2 = bs*bs;
1983     for (i=0; i<x->nz; i++) {
1984       j = 0;
1985       while (j < bs2){
1986         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1987         j++;
1988       }
1989     }
1990     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1991   } else {
1992     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRealPart_SeqBAIJ"
1999 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2000 {
2001   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2002   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2003   PetscScalar    *aa = a->a;
2004 
2005   PetscFunctionBegin;
2006   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2012 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2013 {
2014   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2015   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2016   PetscScalar    *aa = a->a;
2017 
2018   PetscFunctionBegin;
2019   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 
2024 /* -------------------------------------------------------------------*/
2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2026        MatGetRow_SeqBAIJ,
2027        MatRestoreRow_SeqBAIJ,
2028        MatMult_SeqBAIJ_N,
2029 /* 4*/ MatMultAdd_SeqBAIJ_N,
2030        MatMultTranspose_SeqBAIJ,
2031        MatMultTransposeAdd_SeqBAIJ,
2032        MatSolve_SeqBAIJ_N,
2033        0,
2034        0,
2035 /*10*/ 0,
2036        MatLUFactor_SeqBAIJ,
2037        0,
2038        0,
2039        MatTranspose_SeqBAIJ,
2040 /*15*/ MatGetInfo_SeqBAIJ,
2041        MatEqual_SeqBAIJ,
2042        MatGetDiagonal_SeqBAIJ,
2043        MatDiagonalScale_SeqBAIJ,
2044        MatNorm_SeqBAIJ,
2045 /*20*/ 0,
2046        MatAssemblyEnd_SeqBAIJ,
2047        0,
2048        MatSetOption_SeqBAIJ,
2049        MatZeroEntries_SeqBAIJ,
2050 /*25*/ MatZeroRows_SeqBAIJ,
2051        MatLUFactorSymbolic_SeqBAIJ,
2052        MatLUFactorNumeric_SeqBAIJ_N,
2053        MatCholeskyFactorSymbolic_SeqBAIJ,
2054        MatCholeskyFactorNumeric_SeqBAIJ_N,
2055 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2056        MatILUFactorSymbolic_SeqBAIJ,
2057        MatICCFactorSymbolic_SeqBAIJ,
2058        MatGetArray_SeqBAIJ,
2059        MatRestoreArray_SeqBAIJ,
2060 /*35*/ MatDuplicate_SeqBAIJ,
2061        0,
2062        0,
2063        MatILUFactor_SeqBAIJ,
2064        0,
2065 /*40*/ MatAXPY_SeqBAIJ,
2066        MatGetSubMatrices_SeqBAIJ,
2067        MatIncreaseOverlap_SeqBAIJ,
2068        MatGetValues_SeqBAIJ,
2069        MatCopy_SeqBAIJ,
2070 /*45*/ 0,
2071        MatScale_SeqBAIJ,
2072        0,
2073        0,
2074        0,
2075 /*50*/ 0,
2076        MatGetRowIJ_SeqBAIJ,
2077        MatRestoreRowIJ_SeqBAIJ,
2078        0,
2079        0,
2080 /*55*/ 0,
2081        0,
2082        0,
2083        0,
2084        MatSetValuesBlocked_SeqBAIJ,
2085 /*60*/ MatGetSubMatrix_SeqBAIJ,
2086        MatDestroy_SeqBAIJ,
2087        MatView_SeqBAIJ,
2088        0,
2089        0,
2090 /*65*/ 0,
2091        0,
2092        0,
2093        0,
2094        0,
2095 /*70*/ MatGetRowMax_SeqBAIJ,
2096        MatConvert_Basic,
2097        0,
2098        0,
2099        0,
2100 /*75*/ 0,
2101        0,
2102        0,
2103        0,
2104        0,
2105 /*80*/ 0,
2106        0,
2107        0,
2108        0,
2109        MatLoad_SeqBAIJ,
2110 /*85*/ 0,
2111        0,
2112        0,
2113        0,
2114        0,
2115 /*90*/ 0,
2116        0,
2117        0,
2118        0,
2119        0,
2120 /*95*/ 0,
2121        0,
2122        0,
2123        0,
2124        0,
2125 /*100*/0,
2126        0,
2127        0,
2128        0,
2129        0,
2130 /*105*/0,
2131        MatRealPart_SeqBAIJ,
2132        MatImaginaryPart_SeqBAIJ
2133 };
2134 
2135 EXTERN_C_BEGIN
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2138 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2139 {
2140   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2141   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   if (aij->nonew != 1) {
2146     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2147   }
2148 
2149   /* allocate space for values if not already there */
2150   if (!aij->saved_values) {
2151     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2152   }
2153 
2154   /* copy values over */
2155   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 EXTERN_C_END
2159 
2160 EXTERN_C_BEGIN
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2163 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2164 {
2165   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2166   PetscErrorCode ierr;
2167   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2168 
2169   PetscFunctionBegin;
2170   if (aij->nonew != 1) {
2171     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2172   }
2173   if (!aij->saved_values) {
2174     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2175   }
2176 
2177   /* copy values over */
2178   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 EXTERN_C_END
2182 
2183 EXTERN_C_BEGIN
2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2186 EXTERN_C_END
2187 
2188 EXTERN_C_BEGIN
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2191 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2192 {
2193   Mat_SeqBAIJ    *b;
2194   PetscErrorCode ierr;
2195   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2196   PetscTruth     flg,skipallocation = PETSC_FALSE;
2197 
2198   PetscFunctionBegin;
2199 
2200   if (nz == MAT_SKIP_ALLOCATION) {
2201     skipallocation = PETSC_TRUE;
2202     nz             = 0;
2203   }
2204 
2205   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2206     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2207   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2208 
2209   if (nnz && newbs != bs) {
2210     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2211   }
2212   bs   = newbs;
2213 
2214   B->rmap.bs = B->cmap.bs = bs;
2215   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2216   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2217 
2218   B->preallocated = PETSC_TRUE;
2219 
2220   mbs  = B->rmap.n/bs;
2221   nbs  = B->cmap.n/bs;
2222   bs2  = bs*bs;
2223 
2224   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2225     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2226   }
2227 
2228   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2229   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2230   if (nnz) {
2231     for (i=0; i<mbs; i++) {
2232       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2233       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2234     }
2235   }
2236 
2237   b       = (Mat_SeqBAIJ*)B->data;
2238   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2239     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2240   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2241 
2242   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2243   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2244   if (!flg) {
2245     switch (bs) {
2246     case 1:
2247       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2248       B->ops->mult            = MatMult_SeqBAIJ_1;
2249       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2250       break;
2251     case 2:
2252       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2253       B->ops->mult            = MatMult_SeqBAIJ_2;
2254       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2255       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2256       break;
2257     case 3:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2259       B->ops->mult            = MatMult_SeqBAIJ_3;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2262       break;
2263     case 4:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2265       B->ops->mult            = MatMult_SeqBAIJ_4;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2268       break;
2269     case 5:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2271       B->ops->mult            = MatMult_SeqBAIJ_5;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2274       break;
2275     case 6:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2277       B->ops->mult            = MatMult_SeqBAIJ_6;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2279       break;
2280     case 7:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2282       B->ops->mult            = MatMult_SeqBAIJ_7;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2284       break;
2285     default:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2287       B->ops->mult            = MatMult_SeqBAIJ_N;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2289       break;
2290     }
2291   }
2292   B->rmap.bs      = bs;
2293   b->mbs     = mbs;
2294   b->nbs     = nbs;
2295   if (!skipallocation) {
2296     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2297     /* b->ilen will count nonzeros in each block row so far. */
2298     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2299     if (!nnz) {
2300       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2301       else if (nz <= 0)        nz = 1;
2302       for (i=0; i<mbs; i++) b->imax[i] = nz;
2303       nz = nz*mbs;
2304     } else {
2305       nz = 0;
2306       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2307     }
2308 
2309     /* allocate the matrix space */
2310     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2311     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2312     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2313     b->singlemalloc = PETSC_TRUE;
2314 
2315     b->i[0] = 0;
2316     for (i=1; i<mbs+1; i++) {
2317       b->i[i] = b->i[i-1] + b->imax[i-1];
2318     }
2319     b->free_a     = PETSC_TRUE;
2320     b->free_ij    = PETSC_TRUE;
2321   } else {
2322     b->free_a     = PETSC_FALSE;
2323     b->free_ij    = PETSC_FALSE;
2324   }
2325 
2326   B->rmap.bs          = bs;
2327   b->bs2              = bs2;
2328   b->mbs              = mbs;
2329   b->nz               = 0;
2330   b->maxnz            = nz*bs2;
2331   B->info.nz_unneeded = (PetscReal)b->maxnz;
2332   PetscFunctionReturn(0);
2333 }
2334 EXTERN_C_END
2335 
2336 /*MC
2337    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2338    block sparse compressed row format.
2339 
2340    Options Database Keys:
2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2342 
2343   Level: beginner
2344 
2345 .seealso: MatCreateSeqBAIJ()
2346 M*/
2347 
2348 EXTERN_C_BEGIN
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatCreate_SeqBAIJ"
2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2352 {
2353   PetscErrorCode ierr;
2354   PetscMPIInt    size;
2355   Mat_SeqBAIJ    *b;
2356 
2357   PetscFunctionBegin;
2358   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2359   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2360 
2361   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2362   B->data = (void*)b;
2363   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2364   B->factor           = 0;
2365   B->mapping          = 0;
2366   b->row              = 0;
2367   b->col              = 0;
2368   b->icol             = 0;
2369   b->reallocs         = 0;
2370   b->saved_values     = 0;
2371 #if defined(PETSC_USE_MAT_SINGLE)
2372   b->setvalueslen     = 0;
2373   b->setvaluescopy    = PETSC_NULL;
2374 #endif
2375 
2376   b->sorted           = PETSC_FALSE;
2377   b->roworiented      = PETSC_TRUE;
2378   b->nonew            = 0;
2379   b->diag             = 0;
2380   b->solve_work       = 0;
2381   b->mult_work        = 0;
2382   B->spptr            = 0;
2383   B->info.nz_unneeded = (PetscReal)b->maxnz;
2384   b->keepzeroedrows   = PETSC_FALSE;
2385   b->xtoy              = 0;
2386   b->XtoY              = 0;
2387   b->compressedrow.use     = PETSC_FALSE;
2388   b->compressedrow.nrows   = 0;
2389   b->compressedrow.i       = PETSC_NULL;
2390   b->compressedrow.rindex  = PETSC_NULL;
2391   b->compressedrow.checked = PETSC_FALSE;
2392   B->same_nonzero          = PETSC_FALSE;
2393 
2394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2395                                      "MatInvertBlockDiagonal_SeqBAIJ",
2396                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2398                                      "MatStoreValues_SeqBAIJ",
2399                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2401                                      "MatRetrieveValues_SeqBAIJ",
2402                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2404                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2405                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2407                                      "MatConvert_SeqBAIJ_SeqAIJ",
2408                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2410                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2411                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2413                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2414                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 EXTERN_C_END
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2422 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2423 {
2424   Mat            C;
2425   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2426   PetscErrorCode ierr;
2427   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2428 
2429   PetscFunctionBegin;
2430   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2431 
2432   *B = 0;
2433   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2434   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2435   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2436   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2437   c    = (Mat_SeqBAIJ*)C->data;
2438 
2439   C->rmap.N   = A->rmap.N;
2440   C->cmap.N   = A->cmap.N;
2441   C->rmap.bs  = A->rmap.bs;
2442   c->bs2 = a->bs2;
2443   c->mbs = a->mbs;
2444   c->nbs = a->nbs;
2445 
2446   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2447   for (i=0; i<mbs; i++) {
2448     c->imax[i] = a->imax[i];
2449     c->ilen[i] = a->ilen[i];
2450   }
2451 
2452   /* allocate the matrix space */
2453   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2454   c->singlemalloc = PETSC_TRUE;
2455   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2456   if (mbs > 0) {
2457     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2458     if (cpvalues == MAT_COPY_VALUES) {
2459       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2460     } else {
2461       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2462     }
2463   }
2464   c->sorted      = a->sorted;
2465   c->roworiented = a->roworiented;
2466   c->nonew       = a->nonew;
2467 
2468   if (a->diag) {
2469     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2470     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2471     for (i=0; i<mbs; i++) {
2472       c->diag[i] = a->diag[i];
2473     }
2474   } else c->diag        = 0;
2475   c->nz                 = a->nz;
2476   c->maxnz              = a->maxnz;
2477   c->solve_work         = 0;
2478   c->mult_work          = 0;
2479   c->free_a             = PETSC_TRUE;
2480   c->free_ij            = PETSC_TRUE;
2481   C->preallocated       = PETSC_TRUE;
2482   C->assembled          = PETSC_TRUE;
2483 
2484   c->compressedrow.use     = a->compressedrow.use;
2485   c->compressedrow.nrows   = a->compressedrow.nrows;
2486   c->compressedrow.checked = a->compressedrow.checked;
2487   if ( a->compressedrow.checked && a->compressedrow.use){
2488     i = a->compressedrow.nrows;
2489     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2490     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2491     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2492     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2493   } else {
2494     c->compressedrow.use    = PETSC_FALSE;
2495     c->compressedrow.i      = PETSC_NULL;
2496     c->compressedrow.rindex = PETSC_NULL;
2497   }
2498   C->same_nonzero = A->same_nonzero;
2499   *B = C;
2500   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatLoad_SeqBAIJ"
2506 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2507 {
2508   Mat_SeqBAIJ    *a;
2509   Mat            B;
2510   PetscErrorCode ierr;
2511   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2512   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2513   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2514   PetscInt       *masked,nmask,tmp,bs2,ishift;
2515   PetscMPIInt    size;
2516   int            fd;
2517   PetscScalar    *aa;
2518   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2522     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2523   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2524   bs2  = bs*bs;
2525 
2526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2527   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2528   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2529   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2530   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2531   M = header[1]; N = header[2]; nz = header[3];
2532 
2533   if (header[3] < 0) {
2534     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2535   }
2536 
2537   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2538 
2539   /*
2540      This code adds extra rows to make sure the number of rows is
2541     divisible by the blocksize
2542   */
2543   mbs        = M/bs;
2544   extra_rows = bs - M + bs*(mbs);
2545   if (extra_rows == bs) extra_rows = 0;
2546   else                  mbs++;
2547   if (extra_rows) {
2548     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2549   }
2550 
2551   /* read in row lengths */
2552   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2553   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2554   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2555 
2556   /* read in column indices */
2557   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2558   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2559   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2560 
2561   /* loop over row lengths determining block row lengths */
2562   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2563   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2564   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2565   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2566   masked   = mask + mbs;
2567   rowcount = 0; nzcount = 0;
2568   for (i=0; i<mbs; i++) {
2569     nmask = 0;
2570     for (j=0; j<bs; j++) {
2571       kmax = rowlengths[rowcount];
2572       for (k=0; k<kmax; k++) {
2573         tmp = jj[nzcount++]/bs;
2574         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2575       }
2576       rowcount++;
2577     }
2578     browlengths[i] += nmask;
2579     /* zero out the mask elements we set */
2580     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2581   }
2582 
2583   /* create our matrix */
2584   ierr = MatCreate(comm,&B);
2585   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2586   ierr = MatSetType(B,type);CHKERRQ(ierr);
2587   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2588   a = (Mat_SeqBAIJ*)B->data;
2589 
2590   /* set matrix "i" values */
2591   a->i[0] = 0;
2592   for (i=1; i<= mbs; i++) {
2593     a->i[i]      = a->i[i-1] + browlengths[i-1];
2594     a->ilen[i-1] = browlengths[i-1];
2595   }
2596   a->nz         = 0;
2597   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2598 
2599   /* read in nonzero values */
2600   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2601   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2602   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2603 
2604   /* set "a" and "j" values into matrix */
2605   nzcount = 0; jcount = 0;
2606   for (i=0; i<mbs; i++) {
2607     nzcountb = nzcount;
2608     nmask    = 0;
2609     for (j=0; j<bs; j++) {
2610       kmax = rowlengths[i*bs+j];
2611       for (k=0; k<kmax; k++) {
2612         tmp = jj[nzcount++]/bs;
2613 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2614       }
2615     }
2616     /* sort the masked values */
2617     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2618 
2619     /* set "j" values into matrix */
2620     maskcount = 1;
2621     for (j=0; j<nmask; j++) {
2622       a->j[jcount++]  = masked[j];
2623       mask[masked[j]] = maskcount++;
2624     }
2625     /* set "a" values into matrix */
2626     ishift = bs2*a->i[i];
2627     for (j=0; j<bs; j++) {
2628       kmax = rowlengths[i*bs+j];
2629       for (k=0; k<kmax; k++) {
2630         tmp       = jj[nzcountb]/bs ;
2631         block     = mask[tmp] - 1;
2632         point     = jj[nzcountb] - bs*tmp;
2633         idx       = ishift + bs2*block + j + bs*point;
2634         a->a[idx] = (MatScalar)aa[nzcountb++];
2635       }
2636     }
2637     /* zero out the mask elements we set */
2638     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2639   }
2640   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2641 
2642   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2643   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2644   ierr = PetscFree(aa);CHKERRQ(ierr);
2645   ierr = PetscFree(jj);CHKERRQ(ierr);
2646   ierr = PetscFree(mask);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatView_Private(B);CHKERRQ(ierr);
2651 
2652   *A = B;
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatCreateSeqBAIJ"
2658 /*@C
2659    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2660    compressed row) format.  For good matrix assembly performance the
2661    user should preallocate the matrix storage by setting the parameter nz
2662    (or the array nnz).  By setting these parameters accurately, performance
2663    during matrix assembly can be increased by more than a factor of 50.
2664 
2665    Collective on MPI_Comm
2666 
2667    Input Parameters:
2668 +  comm - MPI communicator, set to PETSC_COMM_SELF
2669 .  bs - size of block
2670 .  m - number of rows
2671 .  n - number of columns
2672 .  nz - number of nonzero blocks  per block row (same for all rows)
2673 -  nnz - array containing the number of nonzero blocks in the various block rows
2674          (possibly different for each block row) or PETSC_NULL
2675 
2676    Output Parameter:
2677 .  A - the matrix
2678 
2679    Options Database Keys:
2680 .   -mat_no_unroll - uses code that does not unroll the loops in the
2681                      block calculations (much slower)
2682 .    -mat_block_size - size of the blocks to use
2683 
2684    Level: intermediate
2685 
2686    Notes:
2687    The number of rows and columns must be divisible by blocksize.
2688 
2689    If the nnz parameter is given then the nz parameter is ignored
2690 
2691    A nonzero block is any block that as 1 or more nonzeros in it
2692 
2693    The block AIJ format is fully compatible with standard Fortran 77
2694    storage.  That is, the stored row and column indices can begin at
2695    either one (as in Fortran) or zero.  See the users' manual for details.
2696 
2697    Specify the preallocated storage with either nz or nnz (not both).
2698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2699    allocation.  For additional details, see the users manual chapter on
2700    matrices.
2701 
2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2703 @*/
2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2705 {
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2718 /*@C
2719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2720    per row in the matrix. For good matrix assembly performance the
2721    user should preallocate the matrix storage by setting the parameter nz
2722    (or the array nnz).  By setting these parameters accurately, performance
2723    during matrix assembly can be increased by more than a factor of 50.
2724 
2725    Collective on MPI_Comm
2726 
2727    Input Parameters:
2728 +  A - the matrix
2729 .  bs - size of block
2730 .  nz - number of block nonzeros per block row (same for all rows)
2731 -  nnz - array containing the number of block nonzeros in the various block rows
2732          (possibly different for each block row) or PETSC_NULL
2733 
2734    Options Database Keys:
2735 .   -mat_no_unroll - uses code that does not unroll the loops in the
2736                      block calculations (much slower)
2737 .    -mat_block_size - size of the blocks to use
2738 
2739    Level: intermediate
2740 
2741    Notes:
2742    If the nnz parameter is given then the nz parameter is ignored
2743 
2744    The block AIJ format is fully compatible with standard Fortran 77
2745    storage.  That is, the stored row and column indices can begin at
2746    either one (as in Fortran) or zero.  See the users' manual for details.
2747 
2748    Specify the preallocated storage with either nz or nnz (not both).
2749    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2750    allocation.  For additional details, see the users manual chapter on
2751    matrices.
2752 
2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2754 @*/
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2756 {
2757   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2758 
2759   PetscFunctionBegin;
2760   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2761   if (f) {
2762     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2763   }
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2769 /*@
2770      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2771               (upper triangular entries in CSR format) provided by the user.
2772 
2773      Collective on MPI_Comm
2774 
2775    Input Parameters:
2776 +  comm - must be an MPI communicator of size 1
2777 .  bs - size of block
2778 .  m - number of rows
2779 .  n - number of columns
2780 .  i - row indices
2781 .  j - column indices
2782 -  a - matrix values
2783 
2784    Output Parameter:
2785 .  mat - the matrix
2786 
2787    Level: intermediate
2788 
2789    Notes:
2790        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2791     once the matrix is destroyed
2792 
2793        You cannot set new nonzero locations into this matrix, that will generate an error.
2794 
2795        The i and j indices are 0 based
2796 
2797 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2798 
2799 @*/
2800 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2801 {
2802   PetscErrorCode ierr;
2803   PetscInt       ii;
2804   Mat_SeqBAIJ    *baij;
2805 
2806   PetscFunctionBegin;
2807   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2808   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2809 
2810   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2811   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2812   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2813   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2814   baij = (Mat_SeqBAIJ*)(*mat)->data;
2815   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2816 
2817   baij->i = i;
2818   baij->j = j;
2819   baij->a = a;
2820   baij->singlemalloc = PETSC_FALSE;
2821   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2822   baij->free_a       = PETSC_FALSE;
2823   baij->free_ij       = PETSC_FALSE;
2824 
2825   for (ii=0; ii<m; ii++) {
2826     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2827 #if defined(PETSC_USE_DEBUG)
2828     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2829 #endif
2830   }
2831 #if defined(PETSC_USE_DEBUG)
2832   for (ii=0; ii<baij->i[m]; ii++) {
2833     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2834     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2835   }
2836 #endif
2837 
2838   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842