xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6ab11f9b6ca3a3a4762ba278e5cdc46bee711a5b)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for Fun3d sequential benchmark
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   stepval = (n-1)*4;
590   for (k=0; k<m; k++) { /* loop over added rows */
591     row  = im[k];
592     rp   = aj + ai[row];
593     ap   = aa + 16*ai[row];
594     nrow = ailen[row];
595     low  = 0;
596     for (l=0; l<n; l++) { /* loop over added columns */
597       col = in[l];
598       value = v + k*(stepval+4)*4 + l*4;
599       low = 0; high = nrow;
600       while (high-low > 7) {
601         t = (low+high)/2;
602         if (rp[t] > col) high = t;
603         else             low  = t;
604       }
605       for (i=low; i<high; i++) {
606         if (rp[i] > col) break;
607         if (rp[i] == col) {
608           bap  = ap +  16*i;
609           for (ii=0; ii<4; ii++,value+=stepval) {
610             for (jj=ii; jj<16; jj+=4) {
611               bap[jj] += *value++;
612             }
613           }
614           goto noinsert2;
615         }
616       }
617       N = nrow++ - 1;
618       /* shift up all the later entries in this row */
619       for (ii=N; ii>=i; ii--) {
620         rp[ii+1] = rp[ii];
621         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622       }
623       if (N >= i) {
624         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625       }
626       rp[i] = col;
627       bap   = ap +  16*i;
628       for (ii=0; ii<4; ii++,value+=stepval) {
629         for (jj=ii; jj<16; jj+=4) {
630           bap[jj] = *value++;
631         }
632       }
633       noinsert2:;
634       low = i;
635     }
636     ailen[row] = nrow;
637   }
638   PetscFunctionReturnVoid();
639 }
640 EXTERN_C_END
641 
642 #if defined(PETSC_HAVE_FORTRAN_CAPS)
643 #define matsetvalues4_ MATSETVALUES4
644 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645 #define matsetvalues4_ matsetvalues4
646 #endif
647 
648 EXTERN_C_BEGIN
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValues4_"
651 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652 {
653   Mat         A = *AA;
654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656   PetscInt    *ai=a->i,*ailen=a->ilen;
657   PetscInt    *aj=a->j,brow,bcol;
658   PetscInt    ridx,cidx;
659   MatScalar   *ap,value,*aa=a->a,*bap;
660 
661   PetscFunctionBegin;
662   for (k=0; k<m; k++) { /* loop over added rows */
663     row  = im[k]; brow = row/4;
664     rp   = aj + ai[brow];
665     ap   = aa + 16*ai[brow];
666     nrow = ailen[brow];
667     low  = 0;
668     for (l=0; l<n; l++) { /* loop over added columns */
669       col = in[l]; bcol = col/4;
670       ridx = row % 4; cidx = col % 4;
671       value = v[l + k*n];
672       low = 0; high = nrow;
673       while (high-low > 7) {
674         t = (low+high)/2;
675         if (rp[t] > bcol) high = t;
676         else              low  = t;
677       }
678       for (i=low; i<high; i++) {
679         if (rp[i] > bcol) break;
680         if (rp[i] == bcol) {
681           bap  = ap +  16*i + 4*cidx + ridx;
682           *bap += value;
683           goto noinsert1;
684         }
685       }
686       N = nrow++ - 1;
687       /* shift up all the later entries in this row */
688       for (ii=N; ii>=i; ii--) {
689         rp[ii+1] = rp[ii];
690         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691       }
692       if (N>=i) {
693         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694       }
695       rp[i]                    = bcol;
696       ap[16*i + 4*cidx + ridx] = value;
697       noinsert1:;
698       low = i;
699     }
700     ailen[brow] = nrow;
701   }
702   PetscFunctionReturnVoid();
703 }
704 EXTERN_C_END
705 
706 /*  UGLY, ugly, ugly
707    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
708    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
709    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710    converts the entries into single precision and then calls ..._MatScalar() to put them
711    into the single precision data structures.
712 */
713 #if defined(PETSC_USE_MAT_SINGLE)
714 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715 #else
716 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717 #endif
718 
719 #define CHUNKSIZE  10
720 
721 /*
722      Checks for missing diagonals
723 */
724 #undef __FUNCT__
725 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
726 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727 {
728   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
729   PetscErrorCode ierr;
730   PetscInt       *diag,*jj = a->j,i;
731 
732   PetscFunctionBegin;
733   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
734   diag = a->diag;
735   for (i=0; i<a->mbs; i++) {
736     if (jj[diag[i]] != i) {
737       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
745 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746 {
747   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
748   PetscErrorCode ierr;
749   PetscInt       i,j,*diag,m = a->mbs;
750 
751   PetscFunctionBegin;
752   if (a->diag) PetscFunctionReturn(0);
753 
754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
755   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
756   for (i=0; i<m; i++) {
757     diag[i] = a->i[i+1];
758     for (j=a->i[i]; j<a->i[i+1]; j++) {
759       if (a->j[j] == i) {
760         diag[i] = j;
761         break;
762       }
763     }
764   }
765   a->diag = diag;
766   PetscFunctionReturn(0);
767 }
768 
769 
770 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
774 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775 {
776   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
777   PetscErrorCode ierr;
778   PetscInt       n = a->mbs,i;
779 
780   PetscFunctionBegin;
781   *nn = n;
782   if (!ia) PetscFunctionReturn(0);
783   if (symmetric) {
784     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
785   } else if (oshift == 1) {
786     /* temporarily add 1 to i and j indices */
787     PetscInt nz = a->i[n];
788     for (i=0; i<nz; i++) a->j[i]++;
789     for (i=0; i<n+1; i++) a->i[i]++;
790     *ia = a->i; *ja = a->j;
791   } else {
792     *ia = a->i; *ja = a->j;
793   }
794 
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
800 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801 {
802   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       i,n = a->mbs;
805 
806   PetscFunctionBegin;
807   if (!ia) PetscFunctionReturn(0);
808   if (symmetric) {
809     ierr = PetscFree(*ia);CHKERRQ(ierr);
810     ierr = PetscFree(*ja);CHKERRQ(ierr);
811   } else if (oshift == 1) {
812     PetscInt nz = a->i[n]-1;
813     for (i=0; i<nz; i++) a->j[i]--;
814     for (i=0; i<n+1; i++) a->i[i]--;
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatDestroy_SeqBAIJ"
821 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822 {
823   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827 #if defined(PETSC_USE_LOG)
828   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829 #endif
830   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);
831   if (a->row) {
832     ierr = ISDestroy(a->row);CHKERRQ(ierr);
833   }
834   if (a->col) {
835     ierr = ISDestroy(a->col);CHKERRQ(ierr);
836   }
837   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
838   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
839   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
840   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
841   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
842   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
843   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
848   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
849 
850   ierr = PetscFree(a);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatSetOption_SeqBAIJ"
864 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865 {
866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   switch (op) {
871   case MAT_ROW_ORIENTED:
872     a->roworiented    = PETSC_TRUE;
873     break;
874   case MAT_COLUMN_ORIENTED:
875     a->roworiented    = PETSC_FALSE;
876     break;
877   case MAT_COLUMNS_SORTED:
878     a->sorted         = PETSC_TRUE;
879     break;
880   case MAT_COLUMNS_UNSORTED:
881     a->sorted         = PETSC_FALSE;
882     break;
883   case MAT_KEEP_ZEROED_ROWS:
884     a->keepzeroedrows = PETSC_TRUE;
885     break;
886   case MAT_NO_NEW_NONZERO_LOCATIONS:
887     a->nonew          = 1;
888     break;
889   case MAT_NEW_NONZERO_LOCATION_ERR:
890     a->nonew          = -1;
891     break;
892   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893     a->nonew          = -2;
894     break;
895   case MAT_YES_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 0;
897     break;
898   case MAT_ROWS_SORTED:
899   case MAT_ROWS_UNSORTED:
900   case MAT_YES_NEW_DIAGONALS:
901   case MAT_IGNORE_OFF_PROC_ENTRIES:
902   case MAT_USE_HASH_TABLE:
903     ierr = PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
904     break;
905   case MAT_NO_NEW_DIAGONALS:
906     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907   case MAT_SYMMETRIC:
908   case MAT_STRUCTURALLY_SYMMETRIC:
909   case MAT_NOT_SYMMETRIC:
910   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911   case MAT_HERMITIAN:
912   case MAT_NOT_HERMITIAN:
913   case MAT_SYMMETRY_ETERNAL:
914   case MAT_NOT_SYMMETRY_ETERNAL:
915     break;
916   default:
917     SETERRQ(PETSC_ERR_SUP,"unknown option");
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatGetRow_SeqBAIJ"
924 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925 {
926   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929   MatScalar      *aa,*aa_i;
930   PetscScalar    *v_i;
931 
932   PetscFunctionBegin;
933   bs  = A->bs;
934   ai  = a->i;
935   aj  = a->j;
936   aa  = a->a;
937   bs2 = a->bs2;
938 
939   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940 
941   bn  = row/bs;   /* Block number */
942   bp  = row % bs; /* Block Position */
943   M   = ai[bn+1] - ai[bn];
944   *nz = bs*M;
945 
946   if (v) {
947     *v = 0;
948     if (*nz) {
949       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
950       for (i=0; i<M; i++) { /* for each block in the block row */
951         v_i  = *v + i*bs;
952         aa_i = aa + bs2*(ai[bn] + i);
953         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954       }
955     }
956   }
957 
958   if (idx) {
959     *idx = 0;
960     if (*nz) {
961       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
962       for (i=0; i<M; i++) { /* for each block in the block row */
963         idx_i = *idx + i*bs;
964         itmp  = bs*aj[ai[bn] + i];
965         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966       }
967     }
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
974 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975 {
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
980   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatTranspose_SeqBAIJ"
986 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987 {
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989   Mat            C;
990   PetscErrorCode ierr;
991   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992   PetscInt       *rows,*cols,bs2=a->bs2;
993   PetscScalar    *array;
994 
995   PetscFunctionBegin;
996   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
998   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
999 
1000 #if defined(PETSC_USE_MAT_SINGLE)
1001   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1002   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003 #else
1004   array = a->a;
1005 #endif
1006 
1007   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
1009   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1010   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1011   ierr = PetscFree(col);CHKERRQ(ierr);
1012   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1013   cols = rows + bs;
1014   for (i=0; i<mbs; i++) {
1015     cols[0] = i*bs;
1016     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1017     len = ai[i+1] - ai[i];
1018     for (j=0; j<len; j++) {
1019       rows[0] = (*aj++)*bs;
1020       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1021       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1022       array += bs2;
1023     }
1024   }
1025   ierr = PetscFree(rows);CHKERRQ(ierr);
1026 #if defined(PETSC_USE_MAT_SINGLE)
1027   ierr = PetscFree(array);
1028 #endif
1029 
1030   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032 
1033   if (B) {
1034     *B = C;
1035   } else {
1036     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1037   }
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1043 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1044 {
1045   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1046   PetscErrorCode ierr;
1047   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1048   int            fd;
1049   PetscScalar    *aa;
1050   FILE           *file;
1051 
1052   PetscFunctionBegin;
1053   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1054   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1055   col_lens[0] = MAT_FILE_COOKIE;
1056 
1057   col_lens[1] = A->m;
1058   col_lens[2] = A->n;
1059   col_lens[3] = a->nz*bs2;
1060 
1061   /* store lengths of each row and write (including header) to file */
1062   count = 0;
1063   for (i=0; i<a->mbs; i++) {
1064     for (j=0; j<bs; j++) {
1065       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1066     }
1067   }
1068   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1069   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1070 
1071   /* store column indices (zero start index) */
1072   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1073   count = 0;
1074   for (i=0; i<a->mbs; i++) {
1075     for (j=0; j<bs; j++) {
1076       for (k=a->i[i]; k<a->i[i+1]; k++) {
1077         for (l=0; l<bs; l++) {
1078           jj[count++] = bs*a->j[k] + l;
1079         }
1080       }
1081     }
1082   }
1083   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1084   ierr = PetscFree(jj);CHKERRQ(ierr);
1085 
1086   /* store nonzero values */
1087   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1088   count = 0;
1089   for (i=0; i<a->mbs; i++) {
1090     for (j=0; j<bs; j++) {
1091       for (k=a->i[i]; k<a->i[i+1]; k++) {
1092         for (l=0; l<bs; l++) {
1093           aa[count++] = a->a[bs2*k + l*bs + j];
1094         }
1095       }
1096     }
1097   }
1098   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1099   ierr = PetscFree(aa);CHKERRQ(ierr);
1100 
1101   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1102   if (file) {
1103     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1110 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1111 {
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscErrorCode    ierr;
1114   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1115   PetscViewerFormat format;
1116 
1117   PetscFunctionBegin;
1118   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1119   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1120     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1121   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1122     Mat aij;
1123     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1124     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1125     ierr = MatDestroy(aij);CHKERRQ(ierr);
1126   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1127      PetscFunctionReturn(0);
1128   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1129     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1130     for (i=0; i<a->mbs; i++) {
1131       for (j=0; j<bs; j++) {
1132         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1133         for (k=a->i[i]; k<a->i[i+1]; k++) {
1134           for (l=0; l<bs; l++) {
1135 #if defined(PETSC_USE_COMPLEX)
1136             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1137               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1138                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1139             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1140               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1141                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1142             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1144             }
1145 #else
1146             if (a->a[bs2*k + l*bs + j] != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1148             }
1149 #endif
1150           }
1151         }
1152         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1153       }
1154     }
1155     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1156   } else {
1157     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1158     for (i=0; i<a->mbs; i++) {
1159       for (j=0; j<bs; j++) {
1160         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1161         for (k=a->i[i]; k<a->i[i+1]; k++) {
1162           for (l=0; l<bs; l++) {
1163 #if defined(PETSC_USE_COMPLEX)
1164             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1165               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1166                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1167             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1168               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1169                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1170             } else {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1172             }
1173 #else
1174             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1175 #endif
1176           }
1177         }
1178         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1179       }
1180     }
1181     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1182   }
1183   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1189 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1190 {
1191   Mat            A = (Mat) Aa;
1192   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1195   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1196   MatScalar      *aa;
1197   PetscViewer    viewer;
1198 
1199   PetscFunctionBegin;
1200 
1201   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1202   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1203 
1204   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1205 
1206   /* loop over matrix elements drawing boxes */
1207   color = PETSC_DRAW_BLUE;
1208   for (i=0,row=0; i<mbs; i++,row+=bs) {
1209     for (j=a->i[i]; j<a->i[i+1]; j++) {
1210       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1211       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1212       aa = a->a + j*bs2;
1213       for (k=0; k<bs; k++) {
1214         for (l=0; l<bs; l++) {
1215           if (PetscRealPart(*aa++) >=  0.) continue;
1216           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1217         }
1218       }
1219     }
1220   }
1221   color = PETSC_DRAW_CYAN;
1222   for (i=0,row=0; i<mbs; i++,row+=bs) {
1223     for (j=a->i[i]; j<a->i[i+1]; j++) {
1224       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1225       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1226       aa = a->a + j*bs2;
1227       for (k=0; k<bs; k++) {
1228         for (l=0; l<bs; l++) {
1229           if (PetscRealPart(*aa++) != 0.) continue;
1230           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1231         }
1232       }
1233     }
1234   }
1235 
1236   color = PETSC_DRAW_RED;
1237   for (i=0,row=0; i<mbs; i++,row+=bs) {
1238     for (j=a->i[i]; j<a->i[i+1]; j++) {
1239       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1241       aa = a->a + j*bs2;
1242       for (k=0; k<bs; k++) {
1243         for (l=0; l<bs; l++) {
1244           if (PetscRealPart(*aa++) <= 0.) continue;
1245           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1246         }
1247       }
1248     }
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1255 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1256 {
1257   PetscErrorCode ierr;
1258   PetscReal      xl,yl,xr,yr,w,h;
1259   PetscDraw      draw;
1260   PetscTruth     isnull;
1261 
1262   PetscFunctionBegin;
1263 
1264   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1265   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1266 
1267   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1268   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1269   xr += w;    yr += h;  xl = -w;     yl = -h;
1270   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1271   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1272   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatView_SeqBAIJ"
1278 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1279 {
1280   PetscErrorCode ierr;
1281   PetscTruth     iascii,isbinary,isdraw;
1282 
1283   PetscFunctionBegin;
1284   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1285   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1287   if (iascii){
1288     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1289   } else if (isbinary) {
1290     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1291   } else if (isdraw) {
1292     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1293   } else {
1294     Mat B;
1295     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1296     ierr = MatView(B,viewer);CHKERRQ(ierr);
1297     ierr = MatDestroy(B);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1305 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1306 {
1307   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1308   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1309   PetscInt    *ai = a->i,*ailen = a->ilen;
1310   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1311   MatScalar   *ap,*aa = a->a,zero = 0.0;
1312 
1313   PetscFunctionBegin;
1314   for (k=0; k<m; k++) { /* loop over rows */
1315     row  = im[k]; brow = row/bs;
1316     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1317     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1318     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1319     nrow = ailen[brow];
1320     for (l=0; l<n; l++) { /* loop over columns */
1321       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1322       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1323       col  = in[l] ;
1324       bcol = col/bs;
1325       cidx = col%bs;
1326       ridx = row%bs;
1327       high = nrow;
1328       low  = 0; /* assume unsorted */
1329       while (high-low > 5) {
1330         t = (low+high)/2;
1331         if (rp[t] > bcol) high = t;
1332         else             low  = t;
1333       }
1334       for (i=low; i<high; i++) {
1335         if (rp[i] > bcol) break;
1336         if (rp[i] == bcol) {
1337           *v++ = ap[bs2*i+bs*cidx+ridx];
1338           goto finished;
1339         }
1340       }
1341       *v++ = zero;
1342       finished:;
1343     }
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #if defined(PETSC_USE_MAT_SINGLE)
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1351 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1352 {
1353   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1354   PetscErrorCode ierr;
1355   PetscInt       i,N = m*n*b->bs2;
1356   MatScalar      *vsingle;
1357 
1358   PetscFunctionBegin;
1359   if (N > b->setvalueslen) {
1360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1362     b->setvalueslen  = N;
1363   }
1364   vsingle = b->setvaluescopy;
1365   for (i=0; i<N; i++) {
1366     vsingle[i] = v[i];
1367   }
1368   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 #endif
1372 
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1376 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1377 {
1378   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1379   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1380   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1381   PetscErrorCode  ierr;
1382   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1383   PetscTruth      roworiented=a->roworiented;
1384   const MatScalar *value = v;
1385   MatScalar       *ap,*aa = a->a,*bap;
1386 
1387   PetscFunctionBegin;
1388   if (roworiented) {
1389     stepval = (n-1)*bs;
1390   } else {
1391     stepval = (m-1)*bs;
1392   }
1393   for (k=0; k<m; k++) { /* loop over added rows */
1394     row  = im[k];
1395     if (row < 0) continue;
1396 #if defined(PETSC_USE_DEBUG)
1397     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1398 #endif
1399     rp   = aj + ai[row];
1400     ap   = aa + bs2*ai[row];
1401     rmax = imax[row];
1402     nrow = ailen[row];
1403     low  = 0;
1404     high = nrow;
1405     for (l=0; l<n; l++) { /* loop over added columns */
1406       if (in[l] < 0) continue;
1407 #if defined(PETSC_USE_DEBUG)
1408       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1409 #endif
1410       col = in[l];
1411       if (roworiented) {
1412         value = v + k*(stepval+bs)*bs + l*bs;
1413       } else {
1414         value = v + l*(stepval+bs)*bs + k*bs;
1415       }
1416       if (col < lastcol) low = 0; else high = nrow;
1417       lastcol = col;
1418       while (high-low > 7) {
1419         t = (low+high)/2;
1420         if (rp[t] > col) high = t;
1421         else             low  = t;
1422       }
1423       for (i=low; i<high; i++) {
1424         if (rp[i] > col) break;
1425         if (rp[i] == col) {
1426           bap  = ap +  bs2*i;
1427           if (roworiented) {
1428             if (is == ADD_VALUES) {
1429               for (ii=0; ii<bs; ii++,value+=stepval) {
1430                 for (jj=ii; jj<bs2; jj+=bs) {
1431                   bap[jj] += *value++;
1432                 }
1433               }
1434             } else {
1435               for (ii=0; ii<bs; ii++,value+=stepval) {
1436                 for (jj=ii; jj<bs2; jj+=bs) {
1437                   bap[jj] = *value++;
1438                 }
1439               }
1440             }
1441           } else {
1442             if (is == ADD_VALUES) {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=0; jj<bs; jj++) {
1445                   *bap++ += *value++;
1446                 }
1447               }
1448             } else {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=0; jj<bs; jj++) {
1451                   *bap++  = *value++;
1452                 }
1453               }
1454             }
1455           }
1456           goto noinsert2;
1457         }
1458       }
1459       if (nonew == 1) goto noinsert2;
1460       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1461       if (nrow >= rmax) {
1462         /* there is no extra room in row, therefore enlarge */
1463         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1464         MatScalar *new_a;
1465 
1466         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1467 
1468         /* malloc new storage space */
1469         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1470 
1471         /* copy over old data into new slots */
1472         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1473         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1474         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1475         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1476         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1477         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1478         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1479         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1480         /* free up old matrix storage */
1481         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\
1482         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1483         a->singlemalloc = PETSC_TRUE;
1484 
1485         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1486         rmax = imax[row] = imax[row] + CHUNKSIZE;
1487         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1488         a->maxnz += bs2*CHUNKSIZE;
1489         a->reallocs++;
1490         a->nz++;
1491       }
1492       N = nrow++ - 1;
1493       /* shift up all the later entries in this row */
1494       for (ii=N; ii>=i; ii--) {
1495         rp[ii+1] = rp[ii];
1496         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1497       }
1498       if (N >= i) {
1499         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1500       }
1501       rp[i] = col;
1502       bap   = ap +  bs2*i;
1503       if (roworiented) {
1504         for (ii=0; ii<bs; ii++,value+=stepval) {
1505           for (jj=ii; jj<bs2; jj+=bs) {
1506             bap[jj] = *value++;
1507           }
1508         }
1509       } else {
1510         for (ii=0; ii<bs; ii++,value+=stepval) {
1511           for (jj=0; jj<bs; jj++) {
1512             *bap++  = *value++;
1513           }
1514         }
1515       }
1516       noinsert2:;
1517       low = i;
1518     }
1519     ailen[row] = nrow;
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1530   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1531   PetscErrorCode ierr;
1532   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1533   MatScalar      *aa = a->a,*ap;
1534   PetscReal      ratio=0.6;
1535 
1536   PetscFunctionBegin;
1537   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1538 
1539   if (m) rmax = ailen[0];
1540   for (i=1; i<mbs; i++) {
1541     /* move each row back by the amount of empty slots (fshift) before it*/
1542     fshift += imax[i-1] - ailen[i-1];
1543     rmax   = PetscMax(rmax,ailen[i]);
1544     if (fshift) {
1545       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1546       N = ailen[i];
1547       for (j=0; j<N; j++) {
1548         ip[j-fshift] = ip[j];
1549         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1550       }
1551     }
1552     ai[i] = ai[i-1] + ailen[i-1];
1553   }
1554   if (mbs) {
1555     fshift += imax[mbs-1] - ailen[mbs-1];
1556     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1557   }
1558   /* reset ilen and imax for each row */
1559   for (i=0; i<mbs; i++) {
1560     ailen[i] = imax[i] = ai[i+1] - ai[i];
1561   }
1562   a->nz = ai[mbs];
1563 
1564   /* diagonals may have moved, so kill the diagonal pointers */
1565   a->idiagvalid = PETSC_FALSE;
1566   if (fshift && a->diag) {
1567     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1568     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1569     a->diag = 0;
1570   }
1571   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1572   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1573   ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1574   a->reallocs          = 0;
1575   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1576 
1577   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1578   if (a->compressedrow.use){
1579     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1580   }
1581 
1582   A->same_nonzero = PETSC_TRUE;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 /*
1587    This function returns an array of flags which indicate the locations of contiguous
1588    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1589    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1590    Assume: sizes should be long enough to hold all the values.
1591 */
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1595 {
1596   PetscInt   i,j,k,row;
1597   PetscTruth flg;
1598 
1599   PetscFunctionBegin;
1600   for (i=0,j=0; i<n; j++) {
1601     row = idx[i];
1602     if (row%bs!=0) { /* Not the begining of a block */
1603       sizes[j] = 1;
1604       i++;
1605     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1606       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1607       i++;
1608     } else { /* Begining of the block, so check if the complete block exists */
1609       flg = PETSC_TRUE;
1610       for (k=1; k<bs; k++) {
1611         if (row+k != idx[i+k]) { /* break in the block */
1612           flg = PETSC_FALSE;
1613           break;
1614         }
1615       }
1616       if (flg) { /* No break in the bs */
1617         sizes[j] = bs;
1618         i+= bs;
1619       } else {
1620         sizes[j] = 1;
1621         i++;
1622       }
1623     }
1624   }
1625   *bs_max = j;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1632 {
1633   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1634   PetscErrorCode ierr;
1635   PetscInt       i,j,k,count,is_n,*is_idx,*rows;
1636   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1637   PetscScalar    zero = 0.0;
1638   MatScalar      *aa;
1639 
1640   PetscFunctionBegin;
1641   /* Make a copy of the IS and  sort it */
1642   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1644 
1645   /* allocate memory for rows,sizes */
1646   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1647   sizes = rows + is_n;
1648 
1649   /* copy IS values to rows, and sort them */
1650   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1651   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1652   if (baij->keepzeroedrows) {
1653     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1654     bs_max = is_n;
1655     A->same_nonzero = PETSC_TRUE;
1656   } else {
1657     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1658     A->same_nonzero = PETSC_FALSE;
1659   }
1660   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1661 
1662   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1663     row   = rows[j];
1664     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1665     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1666     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1667     if (sizes[i] == bs && !baij->keepzeroedrows) {
1668       if (diag) {
1669         if (baij->ilen[row/bs] > 0) {
1670           baij->ilen[row/bs]       = 1;
1671           baij->j[baij->i[row/bs]] = row/bs;
1672           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1673         }
1674         /* Now insert all the diagonal values for this bs */
1675         for (k=0; k<bs; k++) {
1676           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1677         }
1678       } else { /* (!diag) */
1679         baij->ilen[row/bs] = 0;
1680       } /* end (!diag) */
1681     } else { /* (sizes[i] != bs) */
1682 #if defined (PETSC_USE_DEBUG)
1683       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1684 #endif
1685       for (k=0; k<count; k++) {
1686         aa[0] =  zero;
1687         aa    += bs;
1688       }
1689       if (diag) {
1690         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1691       }
1692     }
1693   }
1694 
1695   ierr = PetscFree(rows);CHKERRQ(ierr);
1696   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1702 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1703 {
1704   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1705   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1706   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1707   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1708   PetscErrorCode ierr;
1709   PetscInt       ridx,cidx,bs2=a->bs2;
1710   PetscTruth     roworiented=a->roworiented;
1711   MatScalar      *ap,value,*aa=a->a,*bap;
1712 
1713   PetscFunctionBegin;
1714   for (k=0; k<m; k++) { /* loop over added rows */
1715     row  = im[k]; brow = row/bs;
1716     if (row < 0) continue;
1717 #if defined(PETSC_USE_DEBUG)
1718     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1719 #endif
1720     rp   = aj + ai[brow];
1721     ap   = aa + bs2*ai[brow];
1722     rmax = imax[brow];
1723     nrow = ailen[brow];
1724     low  = 0;
1725     high = nrow;
1726     for (l=0; l<n; l++) { /* loop over added columns */
1727       if (in[l] < 0) continue;
1728 #if defined(PETSC_USE_DEBUG)
1729       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1730 #endif
1731       col = in[l]; bcol = col/bs;
1732       ridx = row % bs; cidx = col % bs;
1733       if (roworiented) {
1734         value = v[l + k*n];
1735       } else {
1736         value = v[k + l*m];
1737       }
1738       if (col < lastcol) low = 0; else high = nrow;
1739       lastcol = col;
1740       while (high-low > 7) {
1741         t = (low+high)/2;
1742         if (rp[t] > bcol) high = t;
1743         else              low  = t;
1744       }
1745       for (i=low; i<high; i++) {
1746         if (rp[i] > bcol) break;
1747         if (rp[i] == bcol) {
1748           bap  = ap +  bs2*i + bs*cidx + ridx;
1749           if (is == ADD_VALUES) *bap += value;
1750           else                  *bap  = value;
1751           goto noinsert1;
1752         }
1753       }
1754       if (nonew == 1) goto noinsert1;
1755       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1756       if (nrow >= rmax) {
1757         /* there is no extra room in row, therefore enlarge */
1758         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1759         MatScalar *new_a;
1760 
1761         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1762 
1763         /* Malloc new storage space */
1764         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);
1765 
1766         /* copy over old data into new slots */
1767         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1768         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1769         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr);
1770         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1771         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr);
1772         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1773         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1774         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1775         /* free up old matrix storage */
1776         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);
1777         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1778         a->singlemalloc = PETSC_TRUE;
1779 
1780         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1781         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1782         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr);
1783         a->maxnz += bs2*CHUNKSIZE;
1784         a->reallocs++;
1785         a->nz++;
1786       }
1787       N = nrow++ - 1;
1788       /* shift up all the later entries in this row */
1789       for (ii=N; ii>=i; ii--) {
1790         rp[ii+1] = rp[ii];
1791         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1792       }
1793       if (N>=i) {
1794         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1795       }
1796       rp[i]                      = bcol;
1797       ap[bs2*i + bs*cidx + ridx] = value;
1798       noinsert1:;
1799       low = i;
1800     }
1801     ailen[brow] = nrow;
1802   }
1803   A->same_nonzero = PETSC_FALSE;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1810 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1811 {
1812   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1813   Mat            outA;
1814   PetscErrorCode ierr;
1815   PetscTruth     row_identity,col_identity;
1816 
1817   PetscFunctionBegin;
1818   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1819   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1820   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1821   if (!row_identity || !col_identity) {
1822     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1823   }
1824 
1825   outA          = inA;
1826   inA->factor   = FACTOR_LU;
1827 
1828   if (!a->diag) {
1829     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1830   }
1831 
1832   a->row        = row;
1833   a->col        = col;
1834   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1835   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1836 
1837   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1838   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1839   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1840 
1841   /*
1842       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1843       for ILU(0) factorization with natural ordering
1844   */
1845   if (inA->bs < 8) {
1846     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1847   } else {
1848     if (!a->solve_work) {
1849       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1850       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1851     }
1852   }
1853 
1854   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1855 
1856   PetscFunctionReturn(0);
1857 }
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1860 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1861 {
1862   static PetscTruth called = PETSC_FALSE;
1863   MPI_Comm          comm = A->comm;
1864   PetscErrorCode    ierr;
1865 
1866   PetscFunctionBegin;
1867   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1868   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1869   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 EXTERN_C_BEGIN
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1876 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1877 {
1878   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1879   PetscInt    i,nz,nbs;
1880 
1881   PetscFunctionBegin;
1882   nz  = baij->maxnz/baij->bs2;
1883   nbs = baij->nbs;
1884   for (i=0; i<nz; i++) {
1885     baij->j[i] = indices[i];
1886   }
1887   baij->nz = nz;
1888   for (i=0; i<nbs; i++) {
1889     baij->ilen[i] = baij->imax[i];
1890   }
1891 
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1898 /*@
1899     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1900        in the matrix.
1901 
1902   Input Parameters:
1903 +  mat - the SeqBAIJ matrix
1904 -  indices - the column indices
1905 
1906   Level: advanced
1907 
1908   Notes:
1909     This can be called if you have precomputed the nonzero structure of the
1910   matrix and want to provide it to the matrix object to improve the performance
1911   of the MatSetValues() operation.
1912 
1913     You MUST have set the correct numbers of nonzeros per row in the call to
1914   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1915 
1916     MUST be called before any calls to MatSetValues();
1917 
1918 @*/
1919 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1920 {
1921   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1922 
1923   PetscFunctionBegin;
1924   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1925   PetscValidPointer(indices,2);
1926   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1927   if (f) {
1928     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1929   } else {
1930     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1937 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1938 {
1939   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1942   PetscReal      atmp;
1943   PetscScalar    *x,zero = 0.0;
1944   MatScalar      *aa;
1945   PetscInt       ncols,brow,krow,kcol;
1946 
1947   PetscFunctionBegin;
1948   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1949   bs   = A->bs;
1950   aa   = a->a;
1951   ai   = a->i;
1952   aj   = a->j;
1953   mbs = a->mbs;
1954 
1955   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1956   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1957   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1958   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1959   for (i=0; i<mbs; i++) {
1960     ncols = ai[1] - ai[0]; ai++;
1961     brow  = bs*i;
1962     for (j=0; j<ncols; j++){
1963       /* bcol = bs*(*aj); */
1964       for (kcol=0; kcol<bs; kcol++){
1965         for (krow=0; krow<bs; krow++){
1966           atmp = PetscAbsScalar(*aa); aa++;
1967           row = brow + krow;    /* row index */
1968           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1969           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1970         }
1971       }
1972       aj++;
1973     }
1974   }
1975   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1981 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1982 {
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1992 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1993 {
1994   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1995   PetscFunctionBegin;
1996   *array = a->a;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2002 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2003 {
2004   PetscFunctionBegin;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #include "petscblaslapack.h"
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2011 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
2012 {
2013   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2014   PetscErrorCode ierr;
2015   PetscInt       i,bs=Y->bs,j,bs2;
2016   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2017 
2018   PetscFunctionBegin;
2019   if (str == SAME_NONZERO_PATTERN) {
2020     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
2021   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2022     if (y->xtoy && y->XtoY != X) {
2023       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2024       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2025     }
2026     if (!y->xtoy) { /* get xtoy */
2027       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2028       y->XtoY = X;
2029     }
2030     bs2 = bs*bs;
2031     for (i=0; i<x->nz; i++) {
2032       j = 0;
2033       while (j < bs2){
2034         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2035         j++;
2036       }
2037     }
2038     ierr = PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2039   } else {
2040     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 /* -------------------------------------------------------------------*/
2046 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2047        MatGetRow_SeqBAIJ,
2048        MatRestoreRow_SeqBAIJ,
2049        MatMult_SeqBAIJ_N,
2050 /* 4*/ MatMultAdd_SeqBAIJ_N,
2051        MatMultTranspose_SeqBAIJ,
2052        MatMultTransposeAdd_SeqBAIJ,
2053        MatSolve_SeqBAIJ_N,
2054        0,
2055        0,
2056 /*10*/ 0,
2057        MatLUFactor_SeqBAIJ,
2058        0,
2059        0,
2060        MatTranspose_SeqBAIJ,
2061 /*15*/ MatGetInfo_SeqBAIJ,
2062        MatEqual_SeqBAIJ,
2063        MatGetDiagonal_SeqBAIJ,
2064        MatDiagonalScale_SeqBAIJ,
2065        MatNorm_SeqBAIJ,
2066 /*20*/ 0,
2067        MatAssemblyEnd_SeqBAIJ,
2068        0,
2069        MatSetOption_SeqBAIJ,
2070        MatZeroEntries_SeqBAIJ,
2071 /*25*/ MatZeroRows_SeqBAIJ,
2072        MatLUFactorSymbolic_SeqBAIJ,
2073        MatLUFactorNumeric_SeqBAIJ_N,
2074        MatCholeskyFactorSymbolic_SeqBAIJ,
2075        MatCholeskyFactorNumeric_SeqBAIJ_N,
2076 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2077        MatILUFactorSymbolic_SeqBAIJ,
2078        MatICCFactorSymbolic_SeqBAIJ,
2079        MatGetArray_SeqBAIJ,
2080        MatRestoreArray_SeqBAIJ,
2081 /*35*/ MatDuplicate_SeqBAIJ,
2082        0,
2083        0,
2084        MatILUFactor_SeqBAIJ,
2085        0,
2086 /*40*/ MatAXPY_SeqBAIJ,
2087        MatGetSubMatrices_SeqBAIJ,
2088        MatIncreaseOverlap_SeqBAIJ,
2089        MatGetValues_SeqBAIJ,
2090        0,
2091 /*45*/ MatPrintHelp_SeqBAIJ,
2092        MatScale_SeqBAIJ,
2093        0,
2094        0,
2095        0,
2096 /*50*/ 0,
2097        MatGetRowIJ_SeqBAIJ,
2098        MatRestoreRowIJ_SeqBAIJ,
2099        0,
2100        0,
2101 /*55*/ 0,
2102        0,
2103        0,
2104        0,
2105        MatSetValuesBlocked_SeqBAIJ,
2106 /*60*/ MatGetSubMatrix_SeqBAIJ,
2107        MatDestroy_SeqBAIJ,
2108        MatView_SeqBAIJ,
2109        MatGetPetscMaps_Petsc,
2110        0,
2111 /*65*/ 0,
2112        0,
2113        0,
2114        0,
2115        0,
2116 /*70*/ MatGetRowMax_SeqBAIJ,
2117        MatConvert_Basic,
2118        0,
2119        0,
2120        0,
2121 /*75*/ 0,
2122        0,
2123        0,
2124        0,
2125        0,
2126 /*80*/ 0,
2127        0,
2128        0,
2129        0,
2130        MatLoad_SeqBAIJ,
2131 /*85*/ 0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*90*/ 0,
2137        0,
2138        0,
2139        0,
2140        0,
2141 /*95*/ 0,
2142        0,
2143        0,
2144        0};
2145 
2146 EXTERN_C_BEGIN
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2149 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2150 {
2151   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2152   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2153   PetscErrorCode ierr;
2154 
2155   PetscFunctionBegin;
2156   if (aij->nonew != 1) {
2157     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2158   }
2159 
2160   /* allocate space for values if not already there */
2161   if (!aij->saved_values) {
2162     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2163   }
2164 
2165   /* copy values over */
2166   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 EXTERN_C_END
2170 
2171 EXTERN_C_BEGIN
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2174 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2175 {
2176   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2177   PetscErrorCode ierr;
2178   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2179 
2180   PetscFunctionBegin;
2181   if (aij->nonew != 1) {
2182     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2183   }
2184   if (!aij->saved_values) {
2185     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2186   }
2187 
2188   /* copy values over */
2189   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 EXTERN_C_END
2193 
2194 EXTERN_C_BEGIN
2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
2196 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,MatReuse,Mat*);
2197 EXTERN_C_END
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2203 {
2204   Mat_SeqBAIJ    *b;
2205   PetscErrorCode ierr;
2206   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2207   PetscTruth     flg,skipallocation = PETSC_FALSE;
2208 
2209   PetscFunctionBegin;
2210 
2211   if (nz == MAT_SKIP_ALLOCATION) {
2212     skipallocation = PETSC_TRUE;
2213     nz             = 0;
2214   }
2215 
2216   B->preallocated = PETSC_TRUE;
2217   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2218   if (nnz && newbs != bs) {
2219     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2220   }
2221   bs   = newbs;
2222 
2223   mbs  = B->m/bs;
2224   nbs  = B->n/bs;
2225   bs2  = bs*bs;
2226 
2227   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2228     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2229   }
2230 
2231   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2232   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2233   if (nnz) {
2234     for (i=0; i<mbs; i++) {
2235       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2236       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2237     }
2238   }
2239 
2240   b       = (Mat_SeqBAIJ*)B->data;
2241   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2242   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2243   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2244   if (!flg) {
2245     switch (bs) {
2246     case 1:
2247       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2248       B->ops->mult            = MatMult_SeqBAIJ_1;
2249       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2250       break;
2251     case 2:
2252       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2253       B->ops->mult            = MatMult_SeqBAIJ_2;
2254       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2255       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2256       break;
2257     case 3:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2259       B->ops->mult            = MatMult_SeqBAIJ_3;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2262       break;
2263     case 4:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2265       B->ops->mult            = MatMult_SeqBAIJ_4;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2268       break;
2269     case 5:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2271       B->ops->mult            = MatMult_SeqBAIJ_5;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2274       break;
2275     case 6:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2277       B->ops->mult            = MatMult_SeqBAIJ_6;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2279       break;
2280     case 7:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2282       B->ops->mult            = MatMult_SeqBAIJ_7;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2284       break;
2285     default:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2287       B->ops->mult            = MatMult_SeqBAIJ_N;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2289       break;
2290     }
2291   }
2292   B->bs      = bs;
2293   b->mbs     = mbs;
2294   b->nbs     = nbs;
2295   if (!skipallocation) {
2296     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2297     /* b->ilen will count nonzeros in each block row so far. */
2298     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2299     if (!nnz) {
2300       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2301       else if (nz <= 0)        nz = 1;
2302       for (i=0; i<mbs; i++) b->imax[i] = nz;
2303       nz = nz*mbs;
2304     } else {
2305       nz = 0;
2306       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2307     }
2308 
2309     /* allocate the matrix space */
2310     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2311     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2312     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2313     b->singlemalloc = PETSC_TRUE;
2314 
2315     b->i[0] = 0;
2316     for (i=1; i<mbs+1; i++) {
2317       b->i[i] = b->i[i-1] + b->imax[i-1];
2318     }
2319   }
2320 
2321   B->bs               = bs;
2322   b->bs2              = bs2;
2323   b->mbs              = mbs;
2324   b->nz               = 0;
2325   b->maxnz            = nz*bs2;
2326   B->info.nz_unneeded = (PetscReal)b->maxnz;
2327   PetscFunctionReturn(0);
2328 }
2329 EXTERN_C_END
2330 
2331 /*MC
2332    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2333    block sparse compressed row format.
2334 
2335    Options Database Keys:
2336 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2337 
2338   Level: beginner
2339 
2340 .seealso: MatCreateSeqBAIJ
2341 M*/
2342 
2343 EXTERN_C_BEGIN
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatCreate_SeqBAIJ"
2346 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2347 {
2348   PetscErrorCode ierr;
2349   PetscMPIInt    size;
2350   Mat_SeqBAIJ    *b;
2351 
2352   PetscFunctionBegin;
2353   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2354   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2355 
2356   B->m = B->M = PetscMax(B->m,B->M);
2357   B->n = B->N = PetscMax(B->n,B->N);
2358   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2359   B->data = (void*)b;
2360   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2361   B->factor           = 0;
2362   B->mapping          = 0;
2363   b->row              = 0;
2364   b->col              = 0;
2365   b->icol             = 0;
2366   b->reallocs         = 0;
2367   b->saved_values     = 0;
2368 #if defined(PETSC_USE_MAT_SINGLE)
2369   b->setvalueslen     = 0;
2370   b->setvaluescopy    = PETSC_NULL;
2371 #endif
2372 
2373   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2374   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2375 
2376   b->sorted           = PETSC_FALSE;
2377   b->roworiented      = PETSC_TRUE;
2378   b->nonew            = 0;
2379   b->diag             = 0;
2380   b->solve_work       = 0;
2381   b->mult_work        = 0;
2382   B->spptr            = 0;
2383   B->info.nz_unneeded = (PetscReal)b->maxnz;
2384   b->keepzeroedrows   = PETSC_FALSE;
2385   b->xtoy              = 0;
2386   b->XtoY              = 0;
2387   b->compressedrow.use     = PETSC_FALSE;
2388   b->compressedrow.nrows   = 0;
2389   b->compressedrow.i       = PETSC_NULL;
2390   b->compressedrow.rindex  = PETSC_NULL;
2391   b->compressedrow.checked = PETSC_FALSE;
2392   B->same_nonzero          = PETSC_FALSE;
2393 
2394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2395                                      "MatInvertBlockDiagonal_SeqBAIJ",
2396                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2398                                      "MatStoreValues_SeqBAIJ",
2399                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2401                                      "MatRetrieveValues_SeqBAIJ",
2402                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2404                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2405                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2407                                      "MatConvert_SeqBAIJ_SeqAIJ",
2408                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2409 #if !defined(PETSC_USE_64BIT_INT)
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2411                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2412                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2413 #endif
2414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2415                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2416                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 EXTERN_C_END
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2423 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2424 {
2425   Mat            C;
2426   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2427   PetscErrorCode ierr;
2428   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2429 
2430   PetscFunctionBegin;
2431   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2432 
2433   *B = 0;
2434   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2435   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2436   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2437   c    = (Mat_SeqBAIJ*)C->data;
2438 
2439   C->M   = A->M;
2440   C->N   = A->N;
2441   C->bs  = A->bs;
2442   c->bs2 = a->bs2;
2443   c->mbs = a->mbs;
2444   c->nbs = a->nbs;
2445 
2446   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
2447   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
2448   for (i=0; i<mbs; i++) {
2449     c->imax[i] = a->imax[i];
2450     c->ilen[i] = a->ilen[i];
2451   }
2452 
2453   /* allocate the matrix space */
2454   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2455   c->singlemalloc = PETSC_TRUE;
2456   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2457   if (mbs > 0) {
2458     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2459     if (cpvalues == MAT_COPY_VALUES) {
2460       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2461     } else {
2462       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2463     }
2464   }
2465   c->sorted      = a->sorted;
2466   c->roworiented = a->roworiented;
2467   c->nonew       = a->nonew;
2468 
2469   if (a->diag) {
2470     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2471     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2472     for (i=0; i<mbs; i++) {
2473       c->diag[i] = a->diag[i];
2474     }
2475   } else c->diag        = 0;
2476   c->nz                 = a->nz;
2477   c->maxnz              = a->maxnz;
2478   c->solve_work         = 0;
2479   c->mult_work          = 0;
2480   C->preallocated       = PETSC_TRUE;
2481   C->assembled          = PETSC_TRUE;
2482 
2483   c->compressedrow.use     = a->compressedrow.use;
2484   c->compressedrow.nrows   = a->compressedrow.nrows;
2485   c->compressedrow.checked = a->compressedrow.checked;
2486   if ( a->compressedrow.checked && a->compressedrow.use){
2487     i = a->compressedrow.nrows;
2488     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2489     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2490     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2491     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2492   } else {
2493     c->compressedrow.use    = PETSC_FALSE;
2494     c->compressedrow.i      = PETSC_NULL;
2495     c->compressedrow.rindex = PETSC_NULL;
2496   }
2497   C->same_nonzero = A->same_nonzero;
2498   *B = C;
2499   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatLoad_SeqBAIJ"
2505 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2506 {
2507   Mat_SeqBAIJ    *a;
2508   Mat            B;
2509   PetscErrorCode ierr;
2510   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2511   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2512   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2513   PetscInt       *masked,nmask,tmp,bs2,ishift;
2514   PetscMPIInt    size;
2515   int            fd;
2516   PetscScalar    *aa;
2517   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2518 
2519   PetscFunctionBegin;
2520   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2521   bs2  = bs*bs;
2522 
2523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2524   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2525   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2526   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2527   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2528   M = header[1]; N = header[2]; nz = header[3];
2529 
2530   if (header[3] < 0) {
2531     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2532   }
2533 
2534   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2535 
2536   /*
2537      This code adds extra rows to make sure the number of rows is
2538     divisible by the blocksize
2539   */
2540   mbs        = M/bs;
2541   extra_rows = bs - M + bs*(mbs);
2542   if (extra_rows == bs) extra_rows = 0;
2543   else                  mbs++;
2544   if (extra_rows) {
2545     ierr = PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2546   }
2547 
2548   /* read in row lengths */
2549   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2550   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2551   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2552 
2553   /* read in column indices */
2554   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2555   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2556   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2557 
2558   /* loop over row lengths determining block row lengths */
2559   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2560   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2561   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2562   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2563   masked   = mask + mbs;
2564   rowcount = 0; nzcount = 0;
2565   for (i=0; i<mbs; i++) {
2566     nmask = 0;
2567     for (j=0; j<bs; j++) {
2568       kmax = rowlengths[rowcount];
2569       for (k=0; k<kmax; k++) {
2570         tmp = jj[nzcount++]/bs;
2571         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2572       }
2573       rowcount++;
2574     }
2575     browlengths[i] += nmask;
2576     /* zero out the mask elements we set */
2577     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2578   }
2579 
2580   /* create our matrix */
2581   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2582   ierr = MatSetType(B,type);CHKERRQ(ierr);
2583   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2584   a = (Mat_SeqBAIJ*)B->data;
2585 
2586   /* set matrix "i" values */
2587   a->i[0] = 0;
2588   for (i=1; i<= mbs; i++) {
2589     a->i[i]      = a->i[i-1] + browlengths[i-1];
2590     a->ilen[i-1] = browlengths[i-1];
2591   }
2592   a->nz         = 0;
2593   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2594 
2595   /* read in nonzero values */
2596   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2597   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2598   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2599 
2600   /* set "a" and "j" values into matrix */
2601   nzcount = 0; jcount = 0;
2602   for (i=0; i<mbs; i++) {
2603     nzcountb = nzcount;
2604     nmask    = 0;
2605     for (j=0; j<bs; j++) {
2606       kmax = rowlengths[i*bs+j];
2607       for (k=0; k<kmax; k++) {
2608         tmp = jj[nzcount++]/bs;
2609 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2610       }
2611     }
2612     /* sort the masked values */
2613     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2614 
2615     /* set "j" values into matrix */
2616     maskcount = 1;
2617     for (j=0; j<nmask; j++) {
2618       a->j[jcount++]  = masked[j];
2619       mask[masked[j]] = maskcount++;
2620     }
2621     /* set "a" values into matrix */
2622     ishift = bs2*a->i[i];
2623     for (j=0; j<bs; j++) {
2624       kmax = rowlengths[i*bs+j];
2625       for (k=0; k<kmax; k++) {
2626         tmp       = jj[nzcountb]/bs ;
2627         block     = mask[tmp] - 1;
2628         point     = jj[nzcountb] - bs*tmp;
2629         idx       = ishift + bs2*block + j + bs*point;
2630         a->a[idx] = (MatScalar)aa[nzcountb++];
2631       }
2632     }
2633     /* zero out the mask elements we set */
2634     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2635   }
2636   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2637 
2638   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2639   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2640   ierr = PetscFree(aa);CHKERRQ(ierr);
2641   ierr = PetscFree(jj);CHKERRQ(ierr);
2642   ierr = PetscFree(mask);CHKERRQ(ierr);
2643 
2644   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2645   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2646   ierr = MatView_Private(B);CHKERRQ(ierr);
2647 
2648   *A = B;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatCreateSeqBAIJ"
2654 /*@C
2655    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2656    compressed row) format.  For good matrix assembly performance the
2657    user should preallocate the matrix storage by setting the parameter nz
2658    (or the array nnz).  By setting these parameters accurately, performance
2659    during matrix assembly can be increased by more than a factor of 50.
2660 
2661    Collective on MPI_Comm
2662 
2663    Input Parameters:
2664 +  comm - MPI communicator, set to PETSC_COMM_SELF
2665 .  bs - size of block
2666 .  m - number of rows
2667 .  n - number of columns
2668 .  nz - number of nonzero blocks  per block row (same for all rows)
2669 -  nnz - array containing the number of nonzero blocks in the various block rows
2670          (possibly different for each block row) or PETSC_NULL
2671 
2672    Output Parameter:
2673 .  A - the matrix
2674 
2675    Options Database Keys:
2676 .   -mat_no_unroll - uses code that does not unroll the loops in the
2677                      block calculations (much slower)
2678 .    -mat_block_size - size of the blocks to use
2679 
2680    Level: intermediate
2681 
2682    Notes:
2683    The number of rows and columns must be divisible by blocksize.
2684 
2685    If the nnz parameter is given then the nz parameter is ignored
2686 
2687    A nonzero block is any block that as 1 or more nonzeros in it
2688 
2689    The block AIJ format is fully compatible with standard Fortran 77
2690    storage.  That is, the stored row and column indices can begin at
2691    either one (as in Fortran) or zero.  See the users' manual for details.
2692 
2693    Specify the preallocated storage with either nz or nnz (not both).
2694    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2695    allocation.  For additional details, see the users manual chapter on
2696    matrices.
2697 
2698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2699 @*/
2700 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2701 {
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2706   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2707   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2713 /*@C
2714    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2715    per row in the matrix. For good matrix assembly performance the
2716    user should preallocate the matrix storage by setting the parameter nz
2717    (or the array nnz).  By setting these parameters accurately, performance
2718    during matrix assembly can be increased by more than a factor of 50.
2719 
2720    Collective on MPI_Comm
2721 
2722    Input Parameters:
2723 +  A - the matrix
2724 .  bs - size of block
2725 .  nz - number of block nonzeros per block row (same for all rows)
2726 -  nnz - array containing the number of block nonzeros in the various block rows
2727          (possibly different for each block row) or PETSC_NULL
2728 
2729    Options Database Keys:
2730 .   -mat_no_unroll - uses code that does not unroll the loops in the
2731                      block calculations (much slower)
2732 .    -mat_block_size - size of the blocks to use
2733 
2734    Level: intermediate
2735 
2736    Notes:
2737    If the nnz parameter is given then the nz parameter is ignored
2738 
2739    The block AIJ format is fully compatible with standard Fortran 77
2740    storage.  That is, the stored row and column indices can begin at
2741    either one (as in Fortran) or zero.  See the users' manual for details.
2742 
2743    Specify the preallocated storage with either nz or nnz (not both).
2744    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2745    allocation.  For additional details, see the users manual chapter on
2746    matrices.
2747 
2748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2749 @*/
2750 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2751 {
2752   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2753 
2754   PetscFunctionBegin;
2755   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2756   if (f) {
2757     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2758   }
2759   PetscFunctionReturn(0);
2760 }
2761 
2762