xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2b6de112da0fc1f51a6da7a7bb5eef438765bf99)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
233   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
250       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
251       i2     = 3;
252       idiag += 9;
253       for (i=1; i<m; i++) {
254 	v     = aa + 9*ai[i];
255 	vi    = aj + ai[i];
256 	nz    = diag[i] - ai[i];
257 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
258 	while (nz--) {
259 	  idx  = 3*(*vi++);
260 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
261 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
262 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
263 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
264 	  v   += 9;
265 	}
266 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
267 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
268 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
269         idiag   += 9;
270         i2      += 3;
271       }
272       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
273       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
274     }
275     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
276         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
277       i2    = 0;
278       mdiag = a->idiag+9*a->mbs;
279       for (i=0; i<m; i++) {
280         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
281         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
282         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
283         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
284         mdiag  += 9;
285         i2     += 3;
286       }
287       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
288     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
289       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
290     }
291     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
292       idiag   = a->idiag+9*a->mbs - 9;
293       i2      = 3*m - 3;
294       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
295       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
296       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
297       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
298       idiag -= 9;
299       i2    -= 3;
300       for (i=m-2; i>=0; i--) {
301 	v     = aa + 9*(diag[i]+1);
302 	vi    = aj + diag[i] + 1;
303 	nz    = ai[i+1] - diag[i] - 1;
304 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
305 	while (nz--) {
306 	  idx  = 3*(*vi++);
307 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
308 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
309 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
310 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
311 	  v   += 9;
312 	}
313 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
314 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
315 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
316         idiag   -= 9;
317         i2      -= 3;
318       }
319       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
320     }
321   } else {
322     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
323   }
324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
332 {
333   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
335   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
336   PetscErrorCode     ierr;
337   PetscInt           m = a->mbs,i,i2,nz,idx;
338   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
339 
340   PetscFunctionBegin;
341   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342   its = its*lits;
343   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
348 
349   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
350 
351   diag  = a->diag;
352   idiag = a->idiag;
353   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
355 
356   if (flag & SOR_ZERO_INITIAL_GUESS) {
357     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
359       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
360       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
361       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
362       i2     = 4;
363       idiag += 16;
364       for (i=1; i<m; i++) {
365 	v     = aa + 16*ai[i];
366 	vi    = aj + ai[i];
367 	nz    = diag[i] - ai[i];
368 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
369 	while (nz--) {
370 	  idx  = 4*(*vi++);
371 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
372 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
373 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
374 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
375 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
376 	  v   += 16;
377 	}
378 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
379 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
380 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
381 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
382         idiag   += 16;
383         i2      += 4;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+16*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
394         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
395         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
396         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
397         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
398         mdiag  += 16;
399         i2     += 4;
400       }
401       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
402     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
404     }
405     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406       idiag   = a->idiag+16*a->mbs - 16;
407       i2      = 4*m - 4;
408       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
409       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
410       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
411       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
412       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
413       idiag -= 16;
414       i2    -= 4;
415       for (i=m-2; i>=0; i--) {
416 	v     = aa + 16*(diag[i]+1);
417 	vi    = aj + diag[i] + 1;
418 	nz    = ai[i+1] - diag[i] - 1;
419 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
420 	while (nz--) {
421 	  idx  = 4*(*vi++);
422 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
423 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
424 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
425 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
426 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
427 	  v   += 16;
428 	}
429 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
430 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
431 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
432 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
433         idiag   -= 16;
434         i2      -= 4;
435       }
436       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
437     }
438   } else {
439     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
440   }
441   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
442   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
449 {
450   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
451   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
452   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
453   PetscErrorCode     ierr;
454   PetscInt           m = a->mbs,i,i2,nz,idx;
455   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
456 
457   PetscFunctionBegin;
458   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
459   its = its*lits;
460   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
461   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
462   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
463   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
464   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
465 
466   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
467 
468   diag  = a->diag;
469   idiag = a->idiag;
470   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
472 
473   if (flag & SOR_ZERO_INITIAL_GUESS) {
474     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
476       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
477       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
478       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
479       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
480       i2     = 5;
481       idiag += 25;
482       for (i=1; i<m; i++) {
483 	v     = aa + 25*ai[i];
484 	vi    = aj + ai[i];
485 	nz    = diag[i] - ai[i];
486 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
487 	while (nz--) {
488 	  idx  = 5*(*vi++);
489 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
490 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
491 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
492 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
493 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
494 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
495 	  v   += 25;
496 	}
497 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
498 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
499 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
500 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
501 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
502         idiag   += 25;
503         i2      += 5;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+25*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
514         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
515         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
516         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
517         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
518         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
519         mdiag  += 25;
520         i2     += 5;
521       }
522       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
523     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
525     }
526     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527       idiag   = a->idiag+25*a->mbs - 25;
528       i2      = 5*m - 5;
529       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
530       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
531       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
532       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
533       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
534       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
535       idiag -= 25;
536       i2    -= 5;
537       for (i=m-2; i>=0; i--) {
538 	v     = aa + 25*(diag[i]+1);
539 	vi    = aj + diag[i] + 1;
540 	nz    = ai[i+1] - diag[i] - 1;
541 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
542 	while (nz--) {
543 	  idx  = 5*(*vi++);
544 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
545 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
546 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
547 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
548 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
549 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
550 	  v   += 25;
551 	}
552 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
553 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
554 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
555 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
556 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
557         idiag   -= 25;
558         i2      -= 5;
559       }
560       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
561     }
562   } else {
563     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
564   }
565   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
566   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 /*
571     Special version for direct calls from Fortran (Used in PETSc-fun3d)
572 */
573 #if defined(PETSC_HAVE_FORTRAN_CAPS)
574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
576 #define matsetvaluesblocked4_ matsetvaluesblocked4
577 #endif
578 
579 EXTERN_C_BEGIN
580 #undef __FUNCT__
581 #define __FUNCT__ "matsetvaluesblocked4_"
582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
583 {
584   Mat               A = *AA;
585   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
586   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
587   PetscInt          *ai=a->i,*ailen=a->ilen;
588   PetscInt          *aj=a->j,stepval,lastcol = -1;
589   const PetscScalar *value = v;
590   MatScalar         *ap,*aa = a->a,*bap;
591 
592   PetscFunctionBegin;
593   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
594   stepval = (n-1)*4;
595   for (k=0; k<m; k++) { /* loop over added rows */
596     row  = im[k];
597     rp   = aj + ai[row];
598     ap   = aa + 16*ai[row];
599     nrow = ailen[row];
600     low  = 0;
601     high = nrow;
602     for (l=0; l<n; l++) { /* loop over added columns */
603       col = in[l];
604       if (col <= lastcol) low = 0; else high = nrow;
605       lastcol = col;
606       value = v + k*(stepval+4 + l)*4;
607       while (high-low > 7) {
608         t = (low+high)/2;
609         if (rp[t] > col) high = t;
610         else             low  = t;
611       }
612       for (i=low; i<high; i++) {
613         if (rp[i] > col) break;
614         if (rp[i] == col) {
615           bap  = ap +  16*i;
616           for (ii=0; ii<4; ii++,value+=stepval) {
617             for (jj=ii; jj<16; jj+=4) {
618               bap[jj] += *value++;
619             }
620           }
621           goto noinsert2;
622         }
623       }
624       N = nrow++ - 1;
625       high++; /* added new column index thus must search to one higher than before */
626       /* shift up all the later entries in this row */
627       for (ii=N; ii>=i; ii--) {
628         rp[ii+1] = rp[ii];
629         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
630       }
631       if (N >= i) {
632         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
633       }
634       rp[i] = col;
635       bap   = ap +  16*i;
636       for (ii=0; ii<4; ii++,value+=stepval) {
637         for (jj=ii; jj<16; jj+=4) {
638           bap[jj] = *value++;
639         }
640       }
641       noinsert2:;
642       low = i;
643     }
644     ailen[row] = nrow;
645   }
646   PetscFunctionReturnVoid();
647 }
648 EXTERN_C_END
649 
650 #if defined(PETSC_HAVE_FORTRAN_CAPS)
651 #define matsetvalues4_ MATSETVALUES4
652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
653 #define matsetvalues4_ matsetvalues4
654 #endif
655 
656 EXTERN_C_BEGIN
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetValues4_"
659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
660 {
661   Mat         A = *AA;
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
664   PetscInt    *ai=a->i,*ailen=a->ilen;
665   PetscInt    *aj=a->j,brow,bcol;
666   PetscInt    ridx,cidx,lastcol = -1;
667   MatScalar   *ap,value,*aa=a->a,*bap;
668 
669   PetscFunctionBegin;
670   for (k=0; k<m; k++) { /* loop over added rows */
671     row  = im[k]; brow = row/4;
672     rp   = aj + ai[brow];
673     ap   = aa + 16*ai[brow];
674     nrow = ailen[brow];
675     low  = 0;
676     high = nrow;
677     for (l=0; l<n; l++) { /* loop over added columns */
678       col = in[l]; bcol = col/4;
679       ridx = row % 4; cidx = col % 4;
680       value = v[l + k*n];
681       if (col <= lastcol) low = 0; else high = nrow;
682       lastcol = col;
683       while (high-low > 7) {
684         t = (low+high)/2;
685         if (rp[t] > bcol) high = t;
686         else              low  = t;
687       }
688       for (i=low; i<high; i++) {
689         if (rp[i] > bcol) break;
690         if (rp[i] == bcol) {
691           bap  = ap +  16*i + 4*cidx + ridx;
692           *bap += value;
693           goto noinsert1;
694         }
695       }
696       N = nrow++ - 1;
697       high++; /* added new column thus must search to one higher than before */
698       /* shift up all the later entries in this row */
699       for (ii=N; ii>=i; ii--) {
700         rp[ii+1] = rp[ii];
701         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
702       }
703       if (N>=i) {
704         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
705       }
706       rp[i]                    = bcol;
707       ap[16*i + 4*cidx + ridx] = value;
708       noinsert1:;
709       low = i;
710     }
711     ailen[brow] = nrow;
712   }
713   PetscFunctionReturnVoid();
714 }
715 EXTERN_C_END
716 
717 /*  UGLY, ugly, ugly
718    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
719    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
720    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
721    converts the entries into single precision and then calls ..._MatScalar() to put them
722    into the single precision data structures.
723 */
724 #if defined(PETSC_USE_MAT_SINGLE)
725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
726 #else
727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
728 #endif
729 
730 #define CHUNKSIZE  10
731 
732 /*
733      Checks for missing diagonals
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
738 {
739   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
740   PetscErrorCode ierr;
741   PetscInt       *diag,*jj = a->j,i;
742 
743   PetscFunctionBegin;
744   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
745   *missing = PETSC_FALSE;
746   diag     = a->diag;
747   for (i=0; i<a->mbs; i++) {
748     if (jj[diag[i]] != i) {
749       *missing  = PETSC_TRUE;
750       if (d) *d = i;
751     }
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
759 {
760   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
761   PetscErrorCode ierr;
762   PetscInt       i,j,m = a->mbs;
763 
764   PetscFunctionBegin;
765   if (!a->diag) {
766     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
767   }
768   for (i=0; i<m; i++) {
769     a->diag[i] = a->i[i+1];
770     for (j=a->i[i]; j<a->i[i+1]; j++) {
771       if (a->j[j] == i) {
772         a->diag[i] = j;
773         break;
774       }
775     }
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 
781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1;
790   PetscInt     *tia, *tja;
791 
792   PetscFunctionBegin;
793   *nn = n;
794   if (!ia) PetscFunctionReturn(0);
795   if (symmetric) {
796     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
797   } else {
798     tia = a->i; tja = a->j;
799   }
800 
801   if (!blockcompressed && bs > 1) {
802     (*nn) *= bs;
803     nbs    = bs;
804     /* malloc & create the natural set of indices */
805     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
806     (*ia)[0] = 0;
807     for (j=1; j<bs; j++) {
808       (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
809     }
810 
811     for (i=1; i<n; i++) {
812       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
813       for (j=1; j<bs; j++) {
814         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
815       }
816     }
817 
818     (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
819 
820     if (ja) SETERRQ(PETSC_ERR_SUP,"Cannot provide uncompressed row indices");
821     n     *= bs;
822     nz *= bs*bs;
823     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
824       ierr = PetscFree(tia);CHKERRQ(ierr);
825       ierr = PetscFree(tja);CHKERRQ(ierr);
826     }
827   } else {
828     *ia = tia;
829     if (ja) *ja = tja;
830   }
831   if (oshift == 1) {
832     for (i=0; i<n+nbs; i++) (*ia)[i]++;
833     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
840 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
841 {
842   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
843   PetscErrorCode ierr;
844   PetscInt       i,n = a->mbs,nz = a->i[n];
845 
846   PetscFunctionBegin;
847   if (!ia) PetscFunctionReturn(0);
848   if (!blockcompressed && A->rmap.bs > 1) {
849     ierr = PetscFree(*ia);CHKERRQ(ierr);
850   } else if (symmetric) {
851     ierr = PetscFree(*ia);CHKERRQ(ierr);
852     ierr = PetscFree(*ja);CHKERRQ(ierr);
853   } else if (oshift == 1) { /* blockcompressed */
854     for (i=0; i<nz; i++) a->j[i]--;
855     for (i=0; i<n+1; i++) a->i[i]--;
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatDestroy_SeqBAIJ"
862 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
863 {
864   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868 #if defined(PETSC_USE_LOG)
869   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
870 #endif
871   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
872   if (a->row) {
873     ierr = ISDestroy(a->row);CHKERRQ(ierr);
874   }
875   if (a->col) {
876     ierr = ISDestroy(a->col);CHKERRQ(ierr);
877   }
878   ierr = PetscFree(a->diag);CHKERRQ(ierr);
879   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
880   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
881   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
882   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
883   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
884   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
885 #if defined(PETSC_USE_MAT_SINGLE)
886   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
887 #endif
888   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
889   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
890 
891   ierr = PetscFree(a);CHKERRQ(ierr);
892 
893   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
894   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
895   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
896   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
897   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
898   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
899   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
900   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatSetOption_SeqBAIJ"
906 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
907 {
908   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
909   PetscErrorCode ierr;
910 
911   PetscFunctionBegin;
912   switch (op) {
913   case MAT_ROW_ORIENTED:
914     a->roworiented    = flg;
915     break;
916   case MAT_KEEP_ZEROED_ROWS:
917     a->keepzeroedrows = flg;
918     break;
919   case MAT_NEW_NONZERO_LOCATIONS:
920     a->nonew          = (flg ? 0 : 1);
921     break;
922   case MAT_NEW_NONZERO_LOCATION_ERR:
923     a->nonew          = (flg ? -1 : 0);
924     break;
925   case MAT_NEW_NONZERO_ALLOCATION_ERR:
926     a->nonew          = (flg ? -2 : 0);
927     break;
928   case MAT_NEW_DIAGONALS:
929   case MAT_IGNORE_OFF_PROC_ENTRIES:
930   case MAT_USE_HASH_TABLE:
931     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
932     break;
933   case MAT_SYMMETRIC:
934   case MAT_STRUCTURALLY_SYMMETRIC:
935   case MAT_HERMITIAN:
936   case MAT_SYMMETRY_ETERNAL:
937     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
938     break;
939   default:
940     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatGetRow_SeqBAIJ"
947 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
948 {
949   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
952   MatScalar      *aa,*aa_i;
953   PetscScalar    *v_i;
954 
955   PetscFunctionBegin;
956   bs  = A->rmap.bs;
957   ai  = a->i;
958   aj  = a->j;
959   aa  = a->a;
960   bs2 = a->bs2;
961 
962   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
963 
964   bn  = row/bs;   /* Block number */
965   bp  = row % bs; /* Block Position */
966   M   = ai[bn+1] - ai[bn];
967   *nz = bs*M;
968 
969   if (v) {
970     *v = 0;
971     if (*nz) {
972       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
973       for (i=0; i<M; i++) { /* for each block in the block row */
974         v_i  = *v + i*bs;
975         aa_i = aa + bs2*(ai[bn] + i);
976         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
977       }
978     }
979   }
980 
981   if (idx) {
982     *idx = 0;
983     if (*nz) {
984       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
985       for (i=0; i<M; i++) { /* for each block in the block row */
986         idx_i = *idx + i*bs;
987         itmp  = bs*aj[ai[bn] + i];
988         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
989       }
990     }
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
997 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
998 {
999   PetscErrorCode ierr;
1000 
1001   PetscFunctionBegin;
1002   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1003   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1009 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1010 {
1011   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1012   Mat            C;
1013   PetscErrorCode ierr;
1014   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1015   PetscInt       *rows,*cols,bs2=a->bs2;
1016   PetscScalar    *array;
1017 
1018   PetscFunctionBegin;
1019   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1020   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1021   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1022 
1023 #if defined(PETSC_USE_MAT_SINGLE)
1024   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1025   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1026 #else
1027   array = a->a;
1028 #endif
1029 
1030   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1031   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1032   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1033   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1034   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1035   ierr = PetscFree(col);CHKERRQ(ierr);
1036   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1037   cols = rows + bs;
1038   for (i=0; i<mbs; i++) {
1039     cols[0] = i*bs;
1040     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1041     len = ai[i+1] - ai[i];
1042     for (j=0; j<len; j++) {
1043       rows[0] = (*aj++)*bs;
1044       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1045       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1046       array += bs2;
1047     }
1048   }
1049   ierr = PetscFree(rows);CHKERRQ(ierr);
1050 #if defined(PETSC_USE_MAT_SINGLE)
1051   ierr = PetscFree(array);
1052 #endif
1053 
1054   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1055   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1056 
1057   if (B) {
1058     *B = C;
1059   } else {
1060     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1067 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1068 {
1069   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1072   int            fd;
1073   PetscScalar    *aa;
1074   FILE           *file;
1075 
1076   PetscFunctionBegin;
1077   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1078   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1079   col_lens[0] = MAT_FILE_COOKIE;
1080 
1081   col_lens[1] = A->rmap.N;
1082   col_lens[2] = A->cmap.n;
1083   col_lens[3] = a->nz*bs2;
1084 
1085   /* store lengths of each row and write (including header) to file */
1086   count = 0;
1087   for (i=0; i<a->mbs; i++) {
1088     for (j=0; j<bs; j++) {
1089       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1090     }
1091   }
1092   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1093   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1094 
1095   /* store column indices (zero start index) */
1096   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1097   count = 0;
1098   for (i=0; i<a->mbs; i++) {
1099     for (j=0; j<bs; j++) {
1100       for (k=a->i[i]; k<a->i[i+1]; k++) {
1101         for (l=0; l<bs; l++) {
1102           jj[count++] = bs*a->j[k] + l;
1103         }
1104       }
1105     }
1106   }
1107   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1108   ierr = PetscFree(jj);CHKERRQ(ierr);
1109 
1110   /* store nonzero values */
1111   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1112   count = 0;
1113   for (i=0; i<a->mbs; i++) {
1114     for (j=0; j<bs; j++) {
1115       for (k=a->i[i]; k<a->i[i+1]; k++) {
1116         for (l=0; l<bs; l++) {
1117           aa[count++] = a->a[bs2*k + l*bs + j];
1118         }
1119       }
1120     }
1121   }
1122   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1123   ierr = PetscFree(aa);CHKERRQ(ierr);
1124 
1125   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1126   if (file) {
1127     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1134 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1135 {
1136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1137   PetscErrorCode    ierr;
1138   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1139   PetscViewerFormat format;
1140 
1141   PetscFunctionBegin;
1142   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1143   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1144     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1145   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1146     Mat aij;
1147     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1148     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1149     ierr = MatDestroy(aij);CHKERRQ(ierr);
1150   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1151      PetscFunctionReturn(0);
1152   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1153     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1154     for (i=0; i<a->mbs; i++) {
1155       for (j=0; j<bs; j++) {
1156         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1157         for (k=a->i[i]; k<a->i[i+1]; k++) {
1158           for (l=0; l<bs; l++) {
1159 #if defined(PETSC_USE_COMPLEX)
1160             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1161               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1162                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1163             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1164               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1165                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1166             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1167               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1168             }
1169 #else
1170             if (a->a[bs2*k + l*bs + j] != 0.0) {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1172             }
1173 #endif
1174           }
1175         }
1176         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1177       }
1178     }
1179     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1182     for (i=0; i<a->mbs; i++) {
1183       for (j=0; j<bs; j++) {
1184         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1185         for (k=a->i[i]; k<a->i[i+1]; k++) {
1186           for (l=0; l<bs; l++) {
1187 #if defined(PETSC_USE_COMPLEX)
1188             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1189               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1190                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1191             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1192               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1193                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1194             } else {
1195               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1196             }
1197 #else
1198             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1199 #endif
1200           }
1201         }
1202         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1203       }
1204     }
1205     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1206   }
1207   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1213 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1214 {
1215   Mat            A = (Mat) Aa;
1216   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1219   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1220   MatScalar      *aa;
1221   PetscViewer    viewer;
1222 
1223   PetscFunctionBegin;
1224 
1225   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1226   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1227 
1228   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1229 
1230   /* loop over matrix elements drawing boxes */
1231   color = PETSC_DRAW_BLUE;
1232   for (i=0,row=0; i<mbs; i++,row+=bs) {
1233     for (j=a->i[i]; j<a->i[i+1]; j++) {
1234       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1235       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1236       aa = a->a + j*bs2;
1237       for (k=0; k<bs; k++) {
1238         for (l=0; l<bs; l++) {
1239           if (PetscRealPart(*aa++) >=  0.) continue;
1240           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1241         }
1242       }
1243     }
1244   }
1245   color = PETSC_DRAW_CYAN;
1246   for (i=0,row=0; i<mbs; i++,row+=bs) {
1247     for (j=a->i[i]; j<a->i[i+1]; j++) {
1248       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250       aa = a->a + j*bs2;
1251       for (k=0; k<bs; k++) {
1252         for (l=0; l<bs; l++) {
1253           if (PetscRealPart(*aa++) != 0.) continue;
1254           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1255         }
1256       }
1257     }
1258   }
1259 
1260   color = PETSC_DRAW_RED;
1261   for (i=0,row=0; i<mbs; i++,row+=bs) {
1262     for (j=a->i[i]; j<a->i[i+1]; j++) {
1263       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1264       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1265       aa = a->a + j*bs2;
1266       for (k=0; k<bs; k++) {
1267         for (l=0; l<bs; l++) {
1268           if (PetscRealPart(*aa++) <= 0.) continue;
1269           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1270         }
1271       }
1272     }
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1279 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1280 {
1281   PetscErrorCode ierr;
1282   PetscReal      xl,yl,xr,yr,w,h;
1283   PetscDraw      draw;
1284   PetscTruth     isnull;
1285 
1286   PetscFunctionBegin;
1287 
1288   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1289   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1290 
1291   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1292   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1293   xr += w;    yr += h;  xl = -w;     yl = -h;
1294   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1295   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1296   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatView_SeqBAIJ"
1302 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1303 {
1304   PetscErrorCode ierr;
1305   PetscTruth     iascii,isbinary,isdraw;
1306 
1307   PetscFunctionBegin;
1308   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1309   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1310   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1311   if (iascii){
1312     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1313   } else if (isbinary) {
1314     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1315   } else if (isdraw) {
1316     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1317   } else {
1318     Mat B;
1319     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1320     ierr = MatView(B,viewer);CHKERRQ(ierr);
1321     ierr = MatDestroy(B);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1329 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1330 {
1331   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1332   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1333   PetscInt    *ai = a->i,*ailen = a->ilen;
1334   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1335   MatScalar   *ap,*aa = a->a;
1336 
1337   PetscFunctionBegin;
1338   for (k=0; k<m; k++) { /* loop over rows */
1339     row  = im[k]; brow = row/bs;
1340     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1341     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1342     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1343     nrow = ailen[brow];
1344     for (l=0; l<n; l++) { /* loop over columns */
1345       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1346       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1347       col  = in[l] ;
1348       bcol = col/bs;
1349       cidx = col%bs;
1350       ridx = row%bs;
1351       high = nrow;
1352       low  = 0; /* assume unsorted */
1353       while (high-low > 5) {
1354         t = (low+high)/2;
1355         if (rp[t] > bcol) high = t;
1356         else             low  = t;
1357       }
1358       for (i=low; i<high; i++) {
1359         if (rp[i] > bcol) break;
1360         if (rp[i] == bcol) {
1361           *v++ = ap[bs2*i+bs*cidx+ridx];
1362           goto finished;
1363         }
1364       }
1365       *v++ = 0.0;
1366       finished:;
1367     }
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #if defined(PETSC_USE_MAT_SINGLE)
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1375 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1376 {
1377   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1378   PetscErrorCode ierr;
1379   PetscInt       i,N = m*n*b->bs2;
1380   MatScalar      *vsingle;
1381 
1382   PetscFunctionBegin;
1383   if (N > b->setvalueslen) {
1384     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1385     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1386     b->setvalueslen  = N;
1387   }
1388   vsingle = b->setvaluescopy;
1389   for (i=0; i<N; i++) {
1390     vsingle[i] = v[i];
1391   }
1392   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 #endif
1396 
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1400 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1401 {
1402   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1403   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1404   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1405   PetscErrorCode  ierr;
1406   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1407   PetscTruth      roworiented=a->roworiented;
1408   const MatScalar *value = v;
1409   MatScalar       *ap,*aa = a->a,*bap;
1410 
1411   PetscFunctionBegin;
1412   if (roworiented) {
1413     stepval = (n-1)*bs;
1414   } else {
1415     stepval = (m-1)*bs;
1416   }
1417   for (k=0; k<m; k++) { /* loop over added rows */
1418     row  = im[k];
1419     if (row < 0) continue;
1420 #if defined(PETSC_USE_DEBUG)
1421     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1422 #endif
1423     rp   = aj + ai[row];
1424     ap   = aa + bs2*ai[row];
1425     rmax = imax[row];
1426     nrow = ailen[row];
1427     low  = 0;
1428     high = nrow;
1429     for (l=0; l<n; l++) { /* loop over added columns */
1430       if (in[l] < 0) continue;
1431 #if defined(PETSC_USE_DEBUG)
1432       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1433 #endif
1434       col = in[l];
1435       if (roworiented) {
1436         value = v + k*(stepval+bs)*bs + l*bs;
1437       } else {
1438         value = v + l*(stepval+bs)*bs + k*bs;
1439       }
1440       if (col <= lastcol) low = 0; else high = nrow;
1441       lastcol = col;
1442       while (high-low > 7) {
1443         t = (low+high)/2;
1444         if (rp[t] > col) high = t;
1445         else             low  = t;
1446       }
1447       for (i=low; i<high; i++) {
1448         if (rp[i] > col) break;
1449         if (rp[i] == col) {
1450           bap  = ap +  bs2*i;
1451           if (roworiented) {
1452             if (is == ADD_VALUES) {
1453               for (ii=0; ii<bs; ii++,value+=stepval) {
1454                 for (jj=ii; jj<bs2; jj+=bs) {
1455                   bap[jj] += *value++;
1456                 }
1457               }
1458             } else {
1459               for (ii=0; ii<bs; ii++,value+=stepval) {
1460                 for (jj=ii; jj<bs2; jj+=bs) {
1461                   bap[jj] = *value++;
1462                 }
1463               }
1464             }
1465           } else {
1466             if (is == ADD_VALUES) {
1467               for (ii=0; ii<bs; ii++,value+=stepval) {
1468                 for (jj=0; jj<bs; jj++) {
1469                   *bap++ += *value++;
1470                 }
1471               }
1472             } else {
1473               for (ii=0; ii<bs; ii++,value+=stepval) {
1474                 for (jj=0; jj<bs; jj++) {
1475                   *bap++  = *value++;
1476                 }
1477               }
1478             }
1479           }
1480           goto noinsert2;
1481         }
1482       }
1483       if (nonew == 1) goto noinsert2;
1484       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1485       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1486       N = nrow++ - 1; high++;
1487       /* shift up all the later entries in this row */
1488       for (ii=N; ii>=i; ii--) {
1489         rp[ii+1] = rp[ii];
1490         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1491       }
1492       if (N >= i) {
1493         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1494       }
1495       rp[i] = col;
1496       bap   = ap +  bs2*i;
1497       if (roworiented) {
1498         for (ii=0; ii<bs; ii++,value+=stepval) {
1499           for (jj=ii; jj<bs2; jj+=bs) {
1500             bap[jj] = *value++;
1501           }
1502         }
1503       } else {
1504         for (ii=0; ii<bs; ii++,value+=stepval) {
1505           for (jj=0; jj<bs; jj++) {
1506             *bap++  = *value++;
1507           }
1508         }
1509       }
1510       noinsert2:;
1511       low = i;
1512     }
1513     ailen[row] = nrow;
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1520 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1524   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1525   PetscErrorCode ierr;
1526   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1527   MatScalar      *aa = a->a,*ap;
1528   PetscReal      ratio=0.6;
1529 
1530   PetscFunctionBegin;
1531   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1532 
1533   if (m) rmax = ailen[0];
1534   for (i=1; i<mbs; i++) {
1535     /* move each row back by the amount of empty slots (fshift) before it*/
1536     fshift += imax[i-1] - ailen[i-1];
1537     rmax   = PetscMax(rmax,ailen[i]);
1538     if (fshift) {
1539       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1540       N = ailen[i];
1541       for (j=0; j<N; j++) {
1542         ip[j-fshift] = ip[j];
1543         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1544       }
1545     }
1546     ai[i] = ai[i-1] + ailen[i-1];
1547   }
1548   if (mbs) {
1549     fshift += imax[mbs-1] - ailen[mbs-1];
1550     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1551   }
1552   /* reset ilen and imax for each row */
1553   for (i=0; i<mbs; i++) {
1554     ailen[i] = imax[i] = ai[i+1] - ai[i];
1555   }
1556   a->nz = ai[mbs];
1557 
1558   /* diagonals may have moved, so kill the diagonal pointers */
1559   a->idiagvalid = PETSC_FALSE;
1560   if (fshift && a->diag) {
1561     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1562     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1563     a->diag = 0;
1564   }
1565   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1566   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1567   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1568   a->reallocs          = 0;
1569   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1570 
1571   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1572   if (a->compressedrow.use){
1573     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1574   }
1575 
1576   A->same_nonzero = PETSC_TRUE;
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 /*
1581    This function returns an array of flags which indicate the locations of contiguous
1582    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1583    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1584    Assume: sizes should be long enough to hold all the values.
1585 */
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1588 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1589 {
1590   PetscInt   i,j,k,row;
1591   PetscTruth flg;
1592 
1593   PetscFunctionBegin;
1594   for (i=0,j=0; i<n; j++) {
1595     row = idx[i];
1596     if (row%bs!=0) { /* Not the begining of a block */
1597       sizes[j] = 1;
1598       i++;
1599     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1600       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1601       i++;
1602     } else { /* Begining of the block, so check if the complete block exists */
1603       flg = PETSC_TRUE;
1604       for (k=1; k<bs; k++) {
1605         if (row+k != idx[i+k]) { /* break in the block */
1606           flg = PETSC_FALSE;
1607           break;
1608         }
1609       }
1610       if (flg) { /* No break in the bs */
1611         sizes[j] = bs;
1612         i+= bs;
1613       } else {
1614         sizes[j] = 1;
1615         i++;
1616       }
1617     }
1618   }
1619   *bs_max = j;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1625 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1626 {
1627   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1628   PetscErrorCode ierr;
1629   PetscInt       i,j,k,count,*rows;
1630   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1631   PetscScalar    zero = 0.0;
1632   MatScalar      *aa;
1633 
1634   PetscFunctionBegin;
1635   /* Make a copy of the IS and  sort it */
1636   /* allocate memory for rows,sizes */
1637   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1638   sizes = rows + is_n;
1639 
1640   /* copy IS values to rows, and sort them */
1641   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1642   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1643   if (baij->keepzeroedrows) {
1644     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1645     bs_max = is_n;
1646     A->same_nonzero = PETSC_TRUE;
1647   } else {
1648     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1649     A->same_nonzero = PETSC_FALSE;
1650   }
1651 
1652   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1653     row   = rows[j];
1654     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1655     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1656     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1657     if (sizes[i] == bs && !baij->keepzeroedrows) {
1658       if (diag != 0.0) {
1659         if (baij->ilen[row/bs] > 0) {
1660           baij->ilen[row/bs]       = 1;
1661           baij->j[baij->i[row/bs]] = row/bs;
1662           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1663         }
1664         /* Now insert all the diagonal values for this bs */
1665         for (k=0; k<bs; k++) {
1666           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1667         }
1668       } else { /* (diag == 0.0) */
1669         baij->ilen[row/bs] = 0;
1670       } /* end (diag == 0.0) */
1671     } else { /* (sizes[i] != bs) */
1672 #if defined (PETSC_USE_DEBUG)
1673       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1674 #endif
1675       for (k=0; k<count; k++) {
1676         aa[0] =  zero;
1677         aa    += bs;
1678       }
1679       if (diag != 0.0) {
1680         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1681       }
1682     }
1683   }
1684 
1685   ierr = PetscFree(rows);CHKERRQ(ierr);
1686   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1692 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1693 {
1694   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1695   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1696   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1697   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1698   PetscErrorCode ierr;
1699   PetscInt       ridx,cidx,bs2=a->bs2;
1700   PetscTruth     roworiented=a->roworiented;
1701   MatScalar      *ap,value,*aa=a->a,*bap;
1702 
1703   PetscFunctionBegin;
1704   for (k=0; k<m; k++) { /* loop over added rows */
1705     row  = im[k];
1706     brow = row/bs;
1707     if (row < 0) continue;
1708 #if defined(PETSC_USE_DEBUG)
1709     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1710 #endif
1711     rp   = aj + ai[brow];
1712     ap   = aa + bs2*ai[brow];
1713     rmax = imax[brow];
1714     nrow = ailen[brow];
1715     low  = 0;
1716     high = nrow;
1717     for (l=0; l<n; l++) { /* loop over added columns */
1718       if (in[l] < 0) continue;
1719 #if defined(PETSC_USE_DEBUG)
1720       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1721 #endif
1722       col = in[l]; bcol = col/bs;
1723       ridx = row % bs; cidx = col % bs;
1724       if (roworiented) {
1725         value = v[l + k*n];
1726       } else {
1727         value = v[k + l*m];
1728       }
1729       if (col <= lastcol) low = 0; else high = nrow;
1730       lastcol = col;
1731       while (high-low > 7) {
1732         t = (low+high)/2;
1733         if (rp[t] > bcol) high = t;
1734         else              low  = t;
1735       }
1736       for (i=low; i<high; i++) {
1737         if (rp[i] > bcol) break;
1738         if (rp[i] == bcol) {
1739           bap  = ap +  bs2*i + bs*cidx + ridx;
1740           if (is == ADD_VALUES) *bap += value;
1741           else                  *bap  = value;
1742           goto noinsert1;
1743         }
1744       }
1745       if (nonew == 1) goto noinsert1;
1746       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1747       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1748       N = nrow++ - 1; high++;
1749       /* shift up all the later entries in this row */
1750       for (ii=N; ii>=i; ii--) {
1751         rp[ii+1] = rp[ii];
1752         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1753       }
1754       if (N>=i) {
1755         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1756       }
1757       rp[i]                      = bcol;
1758       ap[bs2*i + bs*cidx + ridx] = value;
1759       a->nz++;
1760       noinsert1:;
1761       low = i;
1762     }
1763     ailen[brow] = nrow;
1764   }
1765   A->same_nonzero = PETSC_FALSE;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1772 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1773 {
1774   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1775   Mat            outA;
1776   PetscErrorCode ierr;
1777   PetscTruth     row_identity,col_identity;
1778 
1779   PetscFunctionBegin;
1780   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1781   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1782   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1783   if (!row_identity || !col_identity) {
1784     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1785   }
1786 
1787   outA          = inA;
1788   inA->factor   = FACTOR_LU;
1789 
1790   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1791 
1792   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1793   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1794   a->row = row;
1795   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1796   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1797   a->col = col;
1798 
1799   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1800   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1801   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1802 
1803   /*
1804       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1805       for ILU(0) factorization with natural ordering
1806   */
1807   if (inA->rmap.bs < 8) {
1808     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1809   } else {
1810     if (!a->solve_work) {
1811       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1812       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1813     }
1814   }
1815 
1816   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1817 
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 EXTERN_C_BEGIN
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1824 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1825 {
1826   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1827   PetscInt    i,nz,nbs;
1828 
1829   PetscFunctionBegin;
1830   nz  = baij->maxnz/baij->bs2;
1831   nbs = baij->nbs;
1832   for (i=0; i<nz; i++) {
1833     baij->j[i] = indices[i];
1834   }
1835   baij->nz = nz;
1836   for (i=0; i<nbs; i++) {
1837     baij->ilen[i] = baij->imax[i];
1838   }
1839 
1840   PetscFunctionReturn(0);
1841 }
1842 EXTERN_C_END
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1846 /*@
1847     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1848        in the matrix.
1849 
1850   Input Parameters:
1851 +  mat - the SeqBAIJ matrix
1852 -  indices - the column indices
1853 
1854   Level: advanced
1855 
1856   Notes:
1857     This can be called if you have precomputed the nonzero structure of the
1858   matrix and want to provide it to the matrix object to improve the performance
1859   of the MatSetValues() operation.
1860 
1861     You MUST have set the correct numbers of nonzeros per row in the call to
1862   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1863 
1864     MUST be called before any calls to MatSetValues();
1865 
1866 @*/
1867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1868 {
1869   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1870 
1871   PetscFunctionBegin;
1872   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1873   PetscValidPointer(indices,2);
1874   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1875   if (f) {
1876     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1877   } else {
1878     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1885 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1886 {
1887   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1888   PetscErrorCode ierr;
1889   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1890   PetscReal      atmp;
1891   PetscScalar    *x,zero = 0.0;
1892   MatScalar      *aa;
1893   PetscInt       ncols,brow,krow,kcol;
1894 
1895   PetscFunctionBegin;
1896   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1897   bs   = A->rmap.bs;
1898   aa   = a->a;
1899   ai   = a->i;
1900   aj   = a->j;
1901   mbs  = a->mbs;
1902 
1903   ierr = VecSet(v,zero);CHKERRQ(ierr);
1904   if (idx) {
1905     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1906   }
1907   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1908   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1909   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1910   for (i=0; i<mbs; i++) {
1911     ncols = ai[1] - ai[0]; ai++;
1912     brow  = bs*i;
1913     for (j=0; j<ncols; j++){
1914       for (kcol=0; kcol<bs; kcol++){
1915         for (krow=0; krow<bs; krow++){
1916           atmp = PetscAbsScalar(*aa);aa++;
1917           row = brow + krow;    /* row index */
1918           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1919           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1920         }
1921       }
1922       aj++;
1923     }
1924   }
1925   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCopy_SeqBAIJ"
1931 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1932 {
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   /* If the two matrices have the same copy implementation, use fast copy. */
1937   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1938     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1939     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1940 
1941     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1942       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1943     }
1944     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1945   } else {
1946     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1953 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1954 {
1955   PetscErrorCode ierr;
1956 
1957   PetscFunctionBegin;
1958   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1964 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1965 {
1966   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1967   PetscFunctionBegin;
1968   *array = a->a;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1974 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #include "petscblaslapack.h"
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1983 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1984 {
1985   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1986   PetscErrorCode ierr;
1987   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1988   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1989 
1990   PetscFunctionBegin;
1991   if (str == SAME_NONZERO_PATTERN) {
1992     PetscScalar alpha = a;
1993     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1994   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1995     if (y->xtoy && y->XtoY != X) {
1996       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1997       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1998     }
1999     if (!y->xtoy) { /* get xtoy */
2000       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2001       y->XtoY = X;
2002     }
2003     bs2 = bs*bs;
2004     for (i=0; i<x->nz; i++) {
2005       j = 0;
2006       while (j < bs2){
2007         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2008         j++;
2009       }
2010     }
2011     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2012   } else {
2013     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2020 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2021 {
2022   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2023   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2024   PetscScalar    *aa = a->a;
2025 
2026   PetscFunctionBegin;
2027   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2033 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2034 {
2035   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2036   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2037   PetscScalar    *aa = a->a;
2038 
2039   PetscFunctionBegin;
2040   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 
2045 /* -------------------------------------------------------------------*/
2046 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2047        MatGetRow_SeqBAIJ,
2048        MatRestoreRow_SeqBAIJ,
2049        MatMult_SeqBAIJ_N,
2050 /* 4*/ MatMultAdd_SeqBAIJ_N,
2051        MatMultTranspose_SeqBAIJ,
2052        MatMultTransposeAdd_SeqBAIJ,
2053        MatSolve_SeqBAIJ_N,
2054        0,
2055        0,
2056 /*10*/ 0,
2057        MatLUFactor_SeqBAIJ,
2058        0,
2059        0,
2060        MatTranspose_SeqBAIJ,
2061 /*15*/ MatGetInfo_SeqBAIJ,
2062        MatEqual_SeqBAIJ,
2063        MatGetDiagonal_SeqBAIJ,
2064        MatDiagonalScale_SeqBAIJ,
2065        MatNorm_SeqBAIJ,
2066 /*20*/ 0,
2067        MatAssemblyEnd_SeqBAIJ,
2068        0,
2069        MatSetOption_SeqBAIJ,
2070        MatZeroEntries_SeqBAIJ,
2071 /*25*/ MatZeroRows_SeqBAIJ,
2072        MatLUFactorSymbolic_SeqBAIJ,
2073        MatLUFactorNumeric_SeqBAIJ_N,
2074        MatCholeskyFactorSymbolic_SeqBAIJ,
2075        MatCholeskyFactorNumeric_SeqBAIJ_N,
2076 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2077        MatILUFactorSymbolic_SeqBAIJ,
2078        MatICCFactorSymbolic_SeqBAIJ,
2079        MatGetArray_SeqBAIJ,
2080        MatRestoreArray_SeqBAIJ,
2081 /*35*/ MatDuplicate_SeqBAIJ,
2082        0,
2083        0,
2084        MatILUFactor_SeqBAIJ,
2085        0,
2086 /*40*/ MatAXPY_SeqBAIJ,
2087        MatGetSubMatrices_SeqBAIJ,
2088        MatIncreaseOverlap_SeqBAIJ,
2089        MatGetValues_SeqBAIJ,
2090        MatCopy_SeqBAIJ,
2091 /*45*/ 0,
2092        MatScale_SeqBAIJ,
2093        0,
2094        0,
2095        0,
2096 /*50*/ 0,
2097        MatGetRowIJ_SeqBAIJ,
2098        MatRestoreRowIJ_SeqBAIJ,
2099        0,
2100        0,
2101 /*55*/ 0,
2102        0,
2103        0,
2104        0,
2105        MatSetValuesBlocked_SeqBAIJ,
2106 /*60*/ MatGetSubMatrix_SeqBAIJ,
2107        MatDestroy_SeqBAIJ,
2108        MatView_SeqBAIJ,
2109        0,
2110        0,
2111 /*65*/ 0,
2112        0,
2113        0,
2114        0,
2115        0,
2116 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2117        MatConvert_Basic,
2118        0,
2119        0,
2120        0,
2121 /*75*/ 0,
2122        0,
2123        0,
2124        0,
2125        0,
2126 /*80*/ 0,
2127        0,
2128        0,
2129        0,
2130        MatLoad_SeqBAIJ,
2131 /*85*/ 0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*90*/ 0,
2137        0,
2138        0,
2139        0,
2140        0,
2141 /*95*/ 0,
2142        0,
2143        0,
2144        0,
2145        0,
2146 /*100*/0,
2147        0,
2148        0,
2149        0,
2150        0,
2151 /*105*/0,
2152        MatRealPart_SeqBAIJ,
2153        MatImaginaryPart_SeqBAIJ,
2154        0,
2155        0,
2156 /*110*/0,
2157        0,
2158        0,
2159        0,
2160        MatMissingDiagonal_SeqBAIJ
2161 /*115*/
2162 };
2163 
2164 EXTERN_C_BEGIN
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2168 {
2169   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2170   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   if (aij->nonew != 1) {
2175     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2176   }
2177 
2178   /* allocate space for values if not already there */
2179   if (!aij->saved_values) {
2180     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2181   }
2182 
2183   /* copy values over */
2184   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 EXTERN_C_END
2188 
2189 EXTERN_C_BEGIN
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2192 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2193 {
2194   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2195   PetscErrorCode ierr;
2196   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2197 
2198   PetscFunctionBegin;
2199   if (aij->nonew != 1) {
2200     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2201   }
2202   if (!aij->saved_values) {
2203     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2204   }
2205 
2206   /* copy values over */
2207   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 EXTERN_C_END
2211 
2212 EXTERN_C_BEGIN
2213 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2215 EXTERN_C_END
2216 
2217 EXTERN_C_BEGIN
2218 #undef __FUNCT__
2219 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2220 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2221 {
2222   Mat_SeqBAIJ    *b;
2223   PetscErrorCode ierr;
2224   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2225   PetscTruth     flg,skipallocation = PETSC_FALSE;
2226 
2227   PetscFunctionBegin;
2228 
2229   if (nz == MAT_SKIP_ALLOCATION) {
2230     skipallocation = PETSC_TRUE;
2231     nz             = 0;
2232   }
2233 
2234   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2235     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2236   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2237 
2238   if (nnz && newbs != bs) {
2239     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2240   }
2241   bs   = newbs;
2242 
2243   B->rmap.bs = B->cmap.bs = bs;
2244   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2245   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2246 
2247   B->preallocated = PETSC_TRUE;
2248 
2249   mbs  = B->rmap.n/bs;
2250   nbs  = B->cmap.n/bs;
2251   bs2  = bs*bs;
2252 
2253   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2254     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2255   }
2256 
2257   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2258   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2259   if (nnz) {
2260     for (i=0; i<mbs; i++) {
2261       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2262       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2263     }
2264   }
2265 
2266   b       = (Mat_SeqBAIJ*)B->data;
2267   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2268     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2269   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2270 
2271   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2272   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2273   if (!flg) {
2274     switch (bs) {
2275     case 1:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2277       B->ops->mult            = MatMult_SeqBAIJ_1;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2279       break;
2280     case 2:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2282       B->ops->mult            = MatMult_SeqBAIJ_2;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2284       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2285       break;
2286     case 3:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2288       B->ops->mult            = MatMult_SeqBAIJ_3;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2290       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2291       break;
2292     case 4:
2293       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2294       B->ops->mult            = MatMult_SeqBAIJ_4;
2295       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2296       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2297       break;
2298     case 5:
2299       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2300       B->ops->mult            = MatMult_SeqBAIJ_5;
2301       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2302       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2303       break;
2304     case 6:
2305       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2306       B->ops->mult            = MatMult_SeqBAIJ_6;
2307       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2308       break;
2309     case 7:
2310       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2311       B->ops->mult            = MatMult_SeqBAIJ_7;
2312       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2313       break;
2314     default:
2315       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2316       B->ops->mult            = MatMult_SeqBAIJ_N;
2317       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2318       break;
2319     }
2320   }
2321   B->rmap.bs      = bs;
2322   b->mbs     = mbs;
2323   b->nbs     = nbs;
2324   if (!skipallocation) {
2325     if (!b->imax) {
2326       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2327     }
2328     /* b->ilen will count nonzeros in each block row so far. */
2329     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2330     if (!nnz) {
2331       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2332       else if (nz <= 0)        nz = 1;
2333       for (i=0; i<mbs; i++) b->imax[i] = nz;
2334       nz = nz*mbs;
2335     } else {
2336       nz = 0;
2337       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2338     }
2339 
2340     /* allocate the matrix space */
2341     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2342     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2343     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2344     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2345     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2346     b->singlemalloc = PETSC_TRUE;
2347     b->i[0] = 0;
2348     for (i=1; i<mbs+1; i++) {
2349       b->i[i] = b->i[i-1] + b->imax[i-1];
2350     }
2351     b->free_a     = PETSC_TRUE;
2352     b->free_ij    = PETSC_TRUE;
2353   } else {
2354     b->free_a     = PETSC_FALSE;
2355     b->free_ij    = PETSC_FALSE;
2356   }
2357 
2358   B->rmap.bs          = bs;
2359   b->bs2              = bs2;
2360   b->mbs              = mbs;
2361   b->nz               = 0;
2362   b->maxnz            = nz*bs2;
2363   B->info.nz_unneeded = (PetscReal)b->maxnz;
2364   PetscFunctionReturn(0);
2365 }
2366 EXTERN_C_END
2367 
2368 /*MC
2369    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2370    block sparse compressed row format.
2371 
2372    Options Database Keys:
2373 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2374 
2375   Level: beginner
2376 
2377 .seealso: MatCreateSeqBAIJ()
2378 M*/
2379 
2380 EXTERN_C_BEGIN
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatCreate_SeqBAIJ"
2383 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2384 {
2385   PetscErrorCode ierr;
2386   PetscMPIInt    size;
2387   Mat_SeqBAIJ    *b;
2388 
2389   PetscFunctionBegin;
2390   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2391   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2392 
2393   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2394   B->data = (void*)b;
2395   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2396   B->factor           = 0;
2397   B->mapping          = 0;
2398   b->row              = 0;
2399   b->col              = 0;
2400   b->icol             = 0;
2401   b->reallocs         = 0;
2402   b->saved_values     = 0;
2403 #if defined(PETSC_USE_MAT_SINGLE)
2404   b->setvalueslen     = 0;
2405   b->setvaluescopy    = PETSC_NULL;
2406 #endif
2407 
2408   b->roworiented      = PETSC_TRUE;
2409   b->nonew            = 0;
2410   b->diag             = 0;
2411   b->solve_work       = 0;
2412   b->mult_work        = 0;
2413   B->spptr            = 0;
2414   B->info.nz_unneeded = (PetscReal)b->maxnz;
2415   b->keepzeroedrows   = PETSC_FALSE;
2416   b->xtoy              = 0;
2417   b->XtoY              = 0;
2418   b->compressedrow.use     = PETSC_FALSE;
2419   b->compressedrow.nrows   = 0;
2420   b->compressedrow.i       = PETSC_NULL;
2421   b->compressedrow.rindex  = PETSC_NULL;
2422   b->compressedrow.checked = PETSC_FALSE;
2423   B->same_nonzero          = PETSC_FALSE;
2424 
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2426                                      "MatInvertBlockDiagonal_SeqBAIJ",
2427                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2429                                      "MatStoreValues_SeqBAIJ",
2430                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2432                                      "MatRetrieveValues_SeqBAIJ",
2433                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2435                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2436                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2438                                      "MatConvert_SeqBAIJ_SeqAIJ",
2439                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2441                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2442                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2443   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2444                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2445                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2446   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 EXTERN_C_END
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2453 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2454 {
2455   Mat            C;
2456   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2457   PetscErrorCode ierr;
2458   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2459 
2460   PetscFunctionBegin;
2461   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2462 
2463   *B = 0;
2464   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2465   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2466   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2467   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2468   c    = (Mat_SeqBAIJ*)C->data;
2469 
2470   C->rmap.N   = A->rmap.N;
2471   C->cmap.N   = A->cmap.N;
2472   C->rmap.bs  = A->rmap.bs;
2473   c->bs2 = a->bs2;
2474   c->mbs = a->mbs;
2475   c->nbs = a->nbs;
2476 
2477   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2478   for (i=0; i<mbs; i++) {
2479     c->imax[i] = a->imax[i];
2480     c->ilen[i] = a->ilen[i];
2481   }
2482 
2483   /* allocate the matrix space */
2484   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2485   c->singlemalloc = PETSC_TRUE;
2486   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2487   if (mbs > 0) {
2488     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2489     if (cpvalues == MAT_COPY_VALUES) {
2490       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2491     } else {
2492       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2493     }
2494   }
2495   c->roworiented = a->roworiented;
2496   c->nonew       = a->nonew;
2497 
2498   if (a->diag) {
2499     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2500     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2501     for (i=0; i<mbs; i++) {
2502       c->diag[i] = a->diag[i];
2503     }
2504   } else c->diag        = 0;
2505   c->nz                 = a->nz;
2506   c->maxnz              = a->maxnz;
2507   c->solve_work         = 0;
2508   c->mult_work          = 0;
2509   c->free_a             = PETSC_TRUE;
2510   c->free_ij            = PETSC_TRUE;
2511   C->preallocated       = PETSC_TRUE;
2512   C->assembled          = PETSC_TRUE;
2513 
2514   c->compressedrow.use     = a->compressedrow.use;
2515   c->compressedrow.nrows   = a->compressedrow.nrows;
2516   c->compressedrow.checked = a->compressedrow.checked;
2517   if ( a->compressedrow.checked && a->compressedrow.use){
2518     i = a->compressedrow.nrows;
2519     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2520     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2521     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2522     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2523   } else {
2524     c->compressedrow.use    = PETSC_FALSE;
2525     c->compressedrow.i      = PETSC_NULL;
2526     c->compressedrow.rindex = PETSC_NULL;
2527   }
2528   C->same_nonzero = A->same_nonzero;
2529   *B = C;
2530   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 #undef __FUNCT__
2535 #define __FUNCT__ "MatLoad_SeqBAIJ"
2536 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2537 {
2538   Mat_SeqBAIJ    *a;
2539   Mat            B;
2540   PetscErrorCode ierr;
2541   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2542   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2543   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2544   PetscInt       *masked,nmask,tmp,bs2,ishift;
2545   PetscMPIInt    size;
2546   int            fd;
2547   PetscScalar    *aa;
2548   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2549 
2550   PetscFunctionBegin;
2551   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2552     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2553   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2554   bs2  = bs*bs;
2555 
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2558   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2559   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2560   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2561   M = header[1]; N = header[2]; nz = header[3];
2562 
2563   if (header[3] < 0) {
2564     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2565   }
2566 
2567   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2568 
2569   /*
2570      This code adds extra rows to make sure the number of rows is
2571     divisible by the blocksize
2572   */
2573   mbs        = M/bs;
2574   extra_rows = bs - M + bs*(mbs);
2575   if (extra_rows == bs) extra_rows = 0;
2576   else                  mbs++;
2577   if (extra_rows) {
2578     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2579   }
2580 
2581   /* read in row lengths */
2582   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2583   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2584   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2585 
2586   /* read in column indices */
2587   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2588   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2589   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2590 
2591   /* loop over row lengths determining block row lengths */
2592   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2593   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2594   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2595   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2596   masked   = mask + mbs;
2597   rowcount = 0; nzcount = 0;
2598   for (i=0; i<mbs; i++) {
2599     nmask = 0;
2600     for (j=0; j<bs; j++) {
2601       kmax = rowlengths[rowcount];
2602       for (k=0; k<kmax; k++) {
2603         tmp = jj[nzcount++]/bs;
2604         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2605       }
2606       rowcount++;
2607     }
2608     browlengths[i] += nmask;
2609     /* zero out the mask elements we set */
2610     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2611   }
2612 
2613   /* create our matrix */
2614   ierr = MatCreate(comm,&B);
2615   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2616   ierr = MatSetType(B,type);CHKERRQ(ierr);
2617   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2618   a = (Mat_SeqBAIJ*)B->data;
2619 
2620   /* set matrix "i" values */
2621   a->i[0] = 0;
2622   for (i=1; i<= mbs; i++) {
2623     a->i[i]      = a->i[i-1] + browlengths[i-1];
2624     a->ilen[i-1] = browlengths[i-1];
2625   }
2626   a->nz         = 0;
2627   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2628 
2629   /* read in nonzero values */
2630   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2631   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2632   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2633 
2634   /* set "a" and "j" values into matrix */
2635   nzcount = 0; jcount = 0;
2636   for (i=0; i<mbs; i++) {
2637     nzcountb = nzcount;
2638     nmask    = 0;
2639     for (j=0; j<bs; j++) {
2640       kmax = rowlengths[i*bs+j];
2641       for (k=0; k<kmax; k++) {
2642         tmp = jj[nzcount++]/bs;
2643 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2644       }
2645     }
2646     /* sort the masked values */
2647     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2648 
2649     /* set "j" values into matrix */
2650     maskcount = 1;
2651     for (j=0; j<nmask; j++) {
2652       a->j[jcount++]  = masked[j];
2653       mask[masked[j]] = maskcount++;
2654     }
2655     /* set "a" values into matrix */
2656     ishift = bs2*a->i[i];
2657     for (j=0; j<bs; j++) {
2658       kmax = rowlengths[i*bs+j];
2659       for (k=0; k<kmax; k++) {
2660         tmp       = jj[nzcountb]/bs ;
2661         block     = mask[tmp] - 1;
2662         point     = jj[nzcountb] - bs*tmp;
2663         idx       = ishift + bs2*block + j + bs*point;
2664         a->a[idx] = (MatScalar)aa[nzcountb++];
2665       }
2666     }
2667     /* zero out the mask elements we set */
2668     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2669   }
2670   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2671 
2672   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2673   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2674   ierr = PetscFree(aa);CHKERRQ(ierr);
2675   ierr = PetscFree(jj);CHKERRQ(ierr);
2676   ierr = PetscFree(mask);CHKERRQ(ierr);
2677 
2678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr = MatView_Private(B);CHKERRQ(ierr);
2681 
2682   *A = B;
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatCreateSeqBAIJ"
2688 /*@C
2689    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2690    compressed row) format.  For good matrix assembly performance the
2691    user should preallocate the matrix storage by setting the parameter nz
2692    (or the array nnz).  By setting these parameters accurately, performance
2693    during matrix assembly can be increased by more than a factor of 50.
2694 
2695    Collective on MPI_Comm
2696 
2697    Input Parameters:
2698 +  comm - MPI communicator, set to PETSC_COMM_SELF
2699 .  bs - size of block
2700 .  m - number of rows
2701 .  n - number of columns
2702 .  nz - number of nonzero blocks  per block row (same for all rows)
2703 -  nnz - array containing the number of nonzero blocks in the various block rows
2704          (possibly different for each block row) or PETSC_NULL
2705 
2706    Output Parameter:
2707 .  A - the matrix
2708 
2709    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2710    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2711    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2712    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2713 
2714    Options Database Keys:
2715 .   -mat_no_unroll - uses code that does not unroll the loops in the
2716                      block calculations (much slower)
2717 .    -mat_block_size - size of the blocks to use
2718 
2719    Level: intermediate
2720 
2721    Notes:
2722    The number of rows and columns must be divisible by blocksize.
2723 
2724    If the nnz parameter is given then the nz parameter is ignored
2725 
2726    A nonzero block is any block that as 1 or more nonzeros in it
2727 
2728    The block AIJ format is fully compatible with standard Fortran 77
2729    storage.  That is, the stored row and column indices can begin at
2730    either one (as in Fortran) or zero.  See the users' manual for details.
2731 
2732    Specify the preallocated storage with either nz or nnz (not both).
2733    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2734    allocation.  For additional details, see the users manual chapter on
2735    matrices.
2736 
2737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2738 @*/
2739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2740 {
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2745   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2746   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2747   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2753 /*@C
2754    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2755    per row in the matrix. For good matrix assembly performance the
2756    user should preallocate the matrix storage by setting the parameter nz
2757    (or the array nnz).  By setting these parameters accurately, performance
2758    during matrix assembly can be increased by more than a factor of 50.
2759 
2760    Collective on MPI_Comm
2761 
2762    Input Parameters:
2763 +  A - the matrix
2764 .  bs - size of block
2765 .  nz - number of block nonzeros per block row (same for all rows)
2766 -  nnz - array containing the number of block nonzeros in the various block rows
2767          (possibly different for each block row) or PETSC_NULL
2768 
2769    Options Database Keys:
2770 .   -mat_no_unroll - uses code that does not unroll the loops in the
2771                      block calculations (much slower)
2772 .    -mat_block_size - size of the blocks to use
2773 
2774    Level: intermediate
2775 
2776    Notes:
2777    If the nnz parameter is given then the nz parameter is ignored
2778 
2779    You can call MatGetInfo() to get information on how effective the preallocation was;
2780    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2781    You can also run with the option -info and look for messages with the string
2782    malloc in them to see if additional memory allocation was needed.
2783 
2784    The block AIJ format is fully compatible with standard Fortran 77
2785    storage.  That is, the stored row and column indices can begin at
2786    either one (as in Fortran) or zero.  See the users' manual for details.
2787 
2788    Specify the preallocated storage with either nz or nnz (not both).
2789    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2790    allocation.  For additional details, see the users manual chapter on
2791    matrices.
2792 
2793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
2794 @*/
2795 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2796 {
2797   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2798 
2799   PetscFunctionBegin;
2800   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2801   if (f) {
2802     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2803   }
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2809 /*@
2810      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2811               (upper triangular entries in CSR format) provided by the user.
2812 
2813      Collective on MPI_Comm
2814 
2815    Input Parameters:
2816 +  comm - must be an MPI communicator of size 1
2817 .  bs - size of block
2818 .  m - number of rows
2819 .  n - number of columns
2820 .  i - row indices
2821 .  j - column indices
2822 -  a - matrix values
2823 
2824    Output Parameter:
2825 .  mat - the matrix
2826 
2827    Level: intermediate
2828 
2829    Notes:
2830        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2831     once the matrix is destroyed
2832 
2833        You cannot set new nonzero locations into this matrix, that will generate an error.
2834 
2835        The i and j indices are 0 based
2836 
2837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2838 
2839 @*/
2840 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2841 {
2842   PetscErrorCode ierr;
2843   PetscInt       ii;
2844   Mat_SeqBAIJ    *baij;
2845 
2846   PetscFunctionBegin;
2847   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2848   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2849 
2850   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2851   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2852   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2853   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2854   baij = (Mat_SeqBAIJ*)(*mat)->data;
2855   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2856 
2857   baij->i = i;
2858   baij->j = j;
2859   baij->a = a;
2860   baij->singlemalloc = PETSC_FALSE;
2861   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2862   baij->free_a       = PETSC_FALSE;
2863   baij->free_ij       = PETSC_FALSE;
2864 
2865   for (ii=0; ii<m; ii++) {
2866     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2867 #if defined(PETSC_USE_DEBUG)
2868     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2869 #endif
2870   }
2871 #if defined(PETSC_USE_DEBUG)
2872   for (ii=0; ii<baij->i[m]; ii++) {
2873     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2874     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2875   }
2876 #endif
2877 
2878   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   PetscFunctionReturn(0);
2881 }
2882