xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2f85c6a59d752db987113db0b8a978ea7d4ca5ee)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   *missing = PETSC_FALSE;
743   diag     = a->diag;
744   for (i=0; i<a->mbs; i++) {
745     if (jj[diag[i]] != i) {
746       *missing  = PETSC_TRUE;
747       if (d) *d = i;
748     }
749   }
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
755 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
756 {
757   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,m = a->mbs;
760 
761   PetscFunctionBegin;
762   if (!a->diag) {
763     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
764   }
765   for (i=0; i<m; i++) {
766     a->diag[i] = a->i[i+1];
767     for (j=a->i[i]; j<a->i[i+1]; j++) {
768       if (a->j[j] == i) {
769         a->diag[i] = j;
770         break;
771       }
772     }
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 
778 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
782 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
783 {
784   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
785   PetscErrorCode ierr;
786   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
787   PetscInt     *tia, *tja;
788 
789   PetscFunctionBegin;
790   *nn = n;
791   if (!ia) PetscFunctionReturn(0);
792   if (symmetric) {
793     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
794   } else {
795     tia = a->i; tja = a->j;
796   }
797 
798   if (!blockcompressed) {
799     /* malloc & create the natural set of indices */
800     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
801     for (i=0; i<n+1; i++) {
802       for (j=0; j<bs; j++) {
803         *ia[i*bs+j] = tia[i]*bs+j;
804       }
805     }
806     for (i=0; i<nz; i++) {
807       for (j=0; j<bs; j++) {
808         *ja[i*bs+j] = tia[i]*bs+j;
809       }
810     }
811     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
812       ierr = PetscFree(tia);CHKERRQ(ierr);
813       ierr = PetscFree(tja);CHKERRQ(ierr);
814     }
815   } else {
816     *ia = tia;
817     *ja = tja;
818   }
819   if (oshift == 1) {
820     for (i=0; i<nz; i++) (*ja)[i]++;
821     for (i=0; i<n+1; i++) (*ia)[i]++;
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
828 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
829 {
830   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
831   PetscErrorCode ierr;
832   PetscInt       i,n = a->mbs,nz = a->i[n];
833 
834   PetscFunctionBegin;
835   if (!ia) PetscFunctionReturn(0);
836   if (!blockcompressed) {
837     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
838   }else if (symmetric) {
839     ierr = PetscFree(*ia);CHKERRQ(ierr);
840     ierr = PetscFree(*ja);CHKERRQ(ierr);
841   } else if (oshift == 1) { /* blockcompressed */
842     for (i=0; i<nz; i++) a->j[i]--;
843     for (i=0; i<n+1; i++) a->i[i]--;
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatDestroy_SeqBAIJ"
850 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
851 {
852   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
853   PetscErrorCode ierr;
854 
855   PetscFunctionBegin;
856 #if defined(PETSC_USE_LOG)
857   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
858 #endif
859   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
860   if (a->row) {
861     ierr = ISDestroy(a->row);CHKERRQ(ierr);
862   }
863   if (a->col) {
864     ierr = ISDestroy(a->col);CHKERRQ(ierr);
865   }
866   ierr = PetscFree(a->diag);CHKERRQ(ierr);
867   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
868   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
869   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
870   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
871   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
872   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
873 #if defined(PETSC_USE_MAT_SINGLE)
874   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
875 #endif
876   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
877   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
878 
879   ierr = PetscFree(a);CHKERRQ(ierr);
880 
881   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
882   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
887   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
888   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatSetOption_SeqBAIJ"
894 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
895 {
896   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   switch (op) {
901   case MAT_ROW_ORIENTED:
902     a->roworiented    = flg;
903     break;
904   case MAT_KEEP_ZEROED_ROWS:
905     a->keepzeroedrows = flg;
906     break;
907   case MAT_NEW_NONZERO_LOCATIONS:
908     a->nonew          = (flg ? 0 : 1);
909     break;
910   case MAT_NEW_NONZERO_LOCATION_ERR:
911     a->nonew          = (flg ? -1 : 0);
912     break;
913   case MAT_NEW_NONZERO_ALLOCATION_ERR:
914     a->nonew          = (flg ? -2 : 0);
915     break;
916   case MAT_NEW_DIAGONALS:
917   case MAT_IGNORE_OFF_PROC_ENTRIES:
918   case MAT_USE_HASH_TABLE:
919     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
920     break;
921   case MAT_SYMMETRIC:
922   case MAT_STRUCTURALLY_SYMMETRIC:
923   case MAT_HERMITIAN:
924   case MAT_SYMMETRY_ETERNAL:
925     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
926     break;
927   default:
928     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatGetRow_SeqBAIJ"
935 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
936 {
937   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
938   PetscErrorCode ierr;
939   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
940   MatScalar      *aa,*aa_i;
941   PetscScalar    *v_i;
942 
943   PetscFunctionBegin;
944   bs  = A->rmap.bs;
945   ai  = a->i;
946   aj  = a->j;
947   aa  = a->a;
948   bs2 = a->bs2;
949 
950   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
951 
952   bn  = row/bs;   /* Block number */
953   bp  = row % bs; /* Block Position */
954   M   = ai[bn+1] - ai[bn];
955   *nz = bs*M;
956 
957   if (v) {
958     *v = 0;
959     if (*nz) {
960       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
961       for (i=0; i<M; i++) { /* for each block in the block row */
962         v_i  = *v + i*bs;
963         aa_i = aa + bs2*(ai[bn] + i);
964         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
965       }
966     }
967   }
968 
969   if (idx) {
970     *idx = 0;
971     if (*nz) {
972       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
973       for (i=0; i<M; i++) { /* for each block in the block row */
974         idx_i = *idx + i*bs;
975         itmp  = bs*aj[ai[bn] + i];
976         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
977       }
978     }
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
985 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
991   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatTranspose_SeqBAIJ"
997 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
998 {
999   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1000   Mat            C;
1001   PetscErrorCode ierr;
1002   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1003   PetscInt       *rows,*cols,bs2=a->bs2;
1004   PetscScalar    *array;
1005 
1006   PetscFunctionBegin;
1007   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1008   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1009   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1010 
1011 #if defined(PETSC_USE_MAT_SINGLE)
1012   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1013   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1014 #else
1015   array = a->a;
1016 #endif
1017 
1018   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1019   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1020   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1021   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1022   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1023   ierr = PetscFree(col);CHKERRQ(ierr);
1024   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1025   cols = rows + bs;
1026   for (i=0; i<mbs; i++) {
1027     cols[0] = i*bs;
1028     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1029     len = ai[i+1] - ai[i];
1030     for (j=0; j<len; j++) {
1031       rows[0] = (*aj++)*bs;
1032       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1033       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1034       array += bs2;
1035     }
1036   }
1037   ierr = PetscFree(rows);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_MAT_SINGLE)
1039   ierr = PetscFree(array);
1040 #endif
1041 
1042   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044 
1045   if (B) {
1046     *B = C;
1047   } else {
1048     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1055 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1056 {
1057   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1058   PetscErrorCode ierr;
1059   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1060   int            fd;
1061   PetscScalar    *aa;
1062   FILE           *file;
1063 
1064   PetscFunctionBegin;
1065   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1066   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1067   col_lens[0] = MAT_FILE_COOKIE;
1068 
1069   col_lens[1] = A->rmap.N;
1070   col_lens[2] = A->cmap.n;
1071   col_lens[3] = a->nz*bs2;
1072 
1073   /* store lengths of each row and write (including header) to file */
1074   count = 0;
1075   for (i=0; i<a->mbs; i++) {
1076     for (j=0; j<bs; j++) {
1077       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1078     }
1079   }
1080   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1081   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1082 
1083   /* store column indices (zero start index) */
1084   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1085   count = 0;
1086   for (i=0; i<a->mbs; i++) {
1087     for (j=0; j<bs; j++) {
1088       for (k=a->i[i]; k<a->i[i+1]; k++) {
1089         for (l=0; l<bs; l++) {
1090           jj[count++] = bs*a->j[k] + l;
1091         }
1092       }
1093     }
1094   }
1095   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1096   ierr = PetscFree(jj);CHKERRQ(ierr);
1097 
1098   /* store nonzero values */
1099   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1100   count = 0;
1101   for (i=0; i<a->mbs; i++) {
1102     for (j=0; j<bs; j++) {
1103       for (k=a->i[i]; k<a->i[i+1]; k++) {
1104         for (l=0; l<bs; l++) {
1105           aa[count++] = a->a[bs2*k + l*bs + j];
1106         }
1107       }
1108     }
1109   }
1110   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1111   ierr = PetscFree(aa);CHKERRQ(ierr);
1112 
1113   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1114   if (file) {
1115     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1116   }
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1122 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1123 {
1124   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1125   PetscErrorCode    ierr;
1126   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1127   PetscViewerFormat format;
1128 
1129   PetscFunctionBegin;
1130   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1131   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1132     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1133   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1134     Mat aij;
1135     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1136     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1137     ierr = MatDestroy(aij);CHKERRQ(ierr);
1138   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1139      PetscFunctionReturn(0);
1140   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1141     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1142     for (i=0; i<a->mbs; i++) {
1143       for (j=0; j<bs; j++) {
1144         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1145         for (k=a->i[i]; k<a->i[i+1]; k++) {
1146           for (l=0; l<bs; l++) {
1147 #if defined(PETSC_USE_COMPLEX)
1148             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1150                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1153                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1154             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1155               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1156             }
1157 #else
1158             if (a->a[bs2*k + l*bs + j] != 0.0) {
1159               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1160             }
1161 #endif
1162           }
1163         }
1164         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1165       }
1166     }
1167     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1168   } else {
1169     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1170     for (i=0; i<a->mbs; i++) {
1171       for (j=0; j<bs; j++) {
1172         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1173         for (k=a->i[i]; k<a->i[i+1]; k++) {
1174           for (l=0; l<bs; l++) {
1175 #if defined(PETSC_USE_COMPLEX)
1176             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1178                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1180               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1181                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1182             } else {
1183               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1184             }
1185 #else
1186             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1187 #endif
1188           }
1189         }
1190         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1191       }
1192     }
1193     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1194   }
1195   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1201 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1202 {
1203   Mat            A = (Mat) Aa;
1204   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1205   PetscErrorCode ierr;
1206   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1207   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1208   MatScalar      *aa;
1209   PetscViewer    viewer;
1210 
1211   PetscFunctionBegin;
1212 
1213   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1214   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1215 
1216   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1217 
1218   /* loop over matrix elements drawing boxes */
1219   color = PETSC_DRAW_BLUE;
1220   for (i=0,row=0; i<mbs; i++,row+=bs) {
1221     for (j=a->i[i]; j<a->i[i+1]; j++) {
1222       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1223       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1224       aa = a->a + j*bs2;
1225       for (k=0; k<bs; k++) {
1226         for (l=0; l<bs; l++) {
1227           if (PetscRealPart(*aa++) >=  0.) continue;
1228           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1229         }
1230       }
1231     }
1232   }
1233   color = PETSC_DRAW_CYAN;
1234   for (i=0,row=0; i<mbs; i++,row+=bs) {
1235     for (j=a->i[i]; j<a->i[i+1]; j++) {
1236       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1237       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1238       aa = a->a + j*bs2;
1239       for (k=0; k<bs; k++) {
1240         for (l=0; l<bs; l++) {
1241           if (PetscRealPart(*aa++) != 0.) continue;
1242           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1243         }
1244       }
1245     }
1246   }
1247 
1248   color = PETSC_DRAW_RED;
1249   for (i=0,row=0; i<mbs; i++,row+=bs) {
1250     for (j=a->i[i]; j<a->i[i+1]; j++) {
1251       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1252       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1253       aa = a->a + j*bs2;
1254       for (k=0; k<bs; k++) {
1255         for (l=0; l<bs; l++) {
1256           if (PetscRealPart(*aa++) <= 0.) continue;
1257           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1258         }
1259       }
1260     }
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1267 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1268 {
1269   PetscErrorCode ierr;
1270   PetscReal      xl,yl,xr,yr,w,h;
1271   PetscDraw      draw;
1272   PetscTruth     isnull;
1273 
1274   PetscFunctionBegin;
1275 
1276   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1277   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1278 
1279   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1280   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1281   xr += w;    yr += h;  xl = -w;     yl = -h;
1282   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1283   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1284   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_SeqBAIJ"
1290 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1291 {
1292   PetscErrorCode ierr;
1293   PetscTruth     iascii,isbinary,isdraw;
1294 
1295   PetscFunctionBegin;
1296   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1297   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1298   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1299   if (iascii){
1300     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1301   } else if (isbinary) {
1302     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1303   } else if (isdraw) {
1304     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1305   } else {
1306     Mat B;
1307     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1308     ierr = MatView(B,viewer);CHKERRQ(ierr);
1309     ierr = MatDestroy(B);CHKERRQ(ierr);
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1317 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1318 {
1319   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1320   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1321   PetscInt    *ai = a->i,*ailen = a->ilen;
1322   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1323   MatScalar   *ap,*aa = a->a;
1324 
1325   PetscFunctionBegin;
1326   for (k=0; k<m; k++) { /* loop over rows */
1327     row  = im[k]; brow = row/bs;
1328     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1329     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1330     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1331     nrow = ailen[brow];
1332     for (l=0; l<n; l++) { /* loop over columns */
1333       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1334       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1335       col  = in[l] ;
1336       bcol = col/bs;
1337       cidx = col%bs;
1338       ridx = row%bs;
1339       high = nrow;
1340       low  = 0; /* assume unsorted */
1341       while (high-low > 5) {
1342         t = (low+high)/2;
1343         if (rp[t] > bcol) high = t;
1344         else             low  = t;
1345       }
1346       for (i=low; i<high; i++) {
1347         if (rp[i] > bcol) break;
1348         if (rp[i] == bcol) {
1349           *v++ = ap[bs2*i+bs*cidx+ridx];
1350           goto finished;
1351         }
1352       }
1353       *v++ = 0.0;
1354       finished:;
1355     }
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #if defined(PETSC_USE_MAT_SINGLE)
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1363 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1364 {
1365   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1366   PetscErrorCode ierr;
1367   PetscInt       i,N = m*n*b->bs2;
1368   MatScalar      *vsingle;
1369 
1370   PetscFunctionBegin;
1371   if (N > b->setvalueslen) {
1372     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1373     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1374     b->setvalueslen  = N;
1375   }
1376   vsingle = b->setvaluescopy;
1377   for (i=0; i<N; i++) {
1378     vsingle[i] = v[i];
1379   }
1380   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 #endif
1384 
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1388 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1389 {
1390   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1391   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1392   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1393   PetscErrorCode  ierr;
1394   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1395   PetscTruth      roworiented=a->roworiented;
1396   const MatScalar *value = v;
1397   MatScalar       *ap,*aa = a->a,*bap;
1398 
1399   PetscFunctionBegin;
1400   if (roworiented) {
1401     stepval = (n-1)*bs;
1402   } else {
1403     stepval = (m-1)*bs;
1404   }
1405   for (k=0; k<m; k++) { /* loop over added rows */
1406     row  = im[k];
1407     if (row < 0) continue;
1408 #if defined(PETSC_USE_DEBUG)
1409     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1410 #endif
1411     rp   = aj + ai[row];
1412     ap   = aa + bs2*ai[row];
1413     rmax = imax[row];
1414     nrow = ailen[row];
1415     low  = 0;
1416     high = nrow;
1417     for (l=0; l<n; l++) { /* loop over added columns */
1418       if (in[l] < 0) continue;
1419 #if defined(PETSC_USE_DEBUG)
1420       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1421 #endif
1422       col = in[l];
1423       if (roworiented) {
1424         value = v + k*(stepval+bs)*bs + l*bs;
1425       } else {
1426         value = v + l*(stepval+bs)*bs + k*bs;
1427       }
1428       if (col <= lastcol) low = 0; else high = nrow;
1429       lastcol = col;
1430       while (high-low > 7) {
1431         t = (low+high)/2;
1432         if (rp[t] > col) high = t;
1433         else             low  = t;
1434       }
1435       for (i=low; i<high; i++) {
1436         if (rp[i] > col) break;
1437         if (rp[i] == col) {
1438           bap  = ap +  bs2*i;
1439           if (roworiented) {
1440             if (is == ADD_VALUES) {
1441               for (ii=0; ii<bs; ii++,value+=stepval) {
1442                 for (jj=ii; jj<bs2; jj+=bs) {
1443                   bap[jj] += *value++;
1444                 }
1445               }
1446             } else {
1447               for (ii=0; ii<bs; ii++,value+=stepval) {
1448                 for (jj=ii; jj<bs2; jj+=bs) {
1449                   bap[jj] = *value++;
1450                 }
1451               }
1452             }
1453           } else {
1454             if (is == ADD_VALUES) {
1455               for (ii=0; ii<bs; ii++,value+=stepval) {
1456                 for (jj=0; jj<bs; jj++) {
1457                   *bap++ += *value++;
1458                 }
1459               }
1460             } else {
1461               for (ii=0; ii<bs; ii++,value+=stepval) {
1462                 for (jj=0; jj<bs; jj++) {
1463                   *bap++  = *value++;
1464                 }
1465               }
1466             }
1467           }
1468           goto noinsert2;
1469         }
1470       }
1471       if (nonew == 1) goto noinsert2;
1472       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1473       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1474       N = nrow++ - 1; high++;
1475       /* shift up all the later entries in this row */
1476       for (ii=N; ii>=i; ii--) {
1477         rp[ii+1] = rp[ii];
1478         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       if (N >= i) {
1481         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1482       }
1483       rp[i] = col;
1484       bap   = ap +  bs2*i;
1485       if (roworiented) {
1486         for (ii=0; ii<bs; ii++,value+=stepval) {
1487           for (jj=ii; jj<bs2; jj+=bs) {
1488             bap[jj] = *value++;
1489           }
1490         }
1491       } else {
1492         for (ii=0; ii<bs; ii++,value+=stepval) {
1493           for (jj=0; jj<bs; jj++) {
1494             *bap++  = *value++;
1495           }
1496         }
1497       }
1498       noinsert2:;
1499       low = i;
1500     }
1501     ailen[row] = nrow;
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1508 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1509 {
1510   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1511   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1512   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1513   PetscErrorCode ierr;
1514   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1515   MatScalar      *aa = a->a,*ap;
1516   PetscReal      ratio=0.6;
1517 
1518   PetscFunctionBegin;
1519   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1520 
1521   if (m) rmax = ailen[0];
1522   for (i=1; i<mbs; i++) {
1523     /* move each row back by the amount of empty slots (fshift) before it*/
1524     fshift += imax[i-1] - ailen[i-1];
1525     rmax   = PetscMax(rmax,ailen[i]);
1526     if (fshift) {
1527       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1528       N = ailen[i];
1529       for (j=0; j<N; j++) {
1530         ip[j-fshift] = ip[j];
1531         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1532       }
1533     }
1534     ai[i] = ai[i-1] + ailen[i-1];
1535   }
1536   if (mbs) {
1537     fshift += imax[mbs-1] - ailen[mbs-1];
1538     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1539   }
1540   /* reset ilen and imax for each row */
1541   for (i=0; i<mbs; i++) {
1542     ailen[i] = imax[i] = ai[i+1] - ai[i];
1543   }
1544   a->nz = ai[mbs];
1545 
1546   /* diagonals may have moved, so kill the diagonal pointers */
1547   a->idiagvalid = PETSC_FALSE;
1548   if (fshift && a->diag) {
1549     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1550     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1551     a->diag = 0;
1552   }
1553   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1554   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1555   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1556   a->reallocs          = 0;
1557   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1558 
1559   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1560   if (a->compressedrow.use){
1561     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1562   }
1563 
1564   A->same_nonzero = PETSC_TRUE;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 /*
1569    This function returns an array of flags which indicate the locations of contiguous
1570    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1571    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1572    Assume: sizes should be long enough to hold all the values.
1573 */
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1576 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1577 {
1578   PetscInt   i,j,k,row;
1579   PetscTruth flg;
1580 
1581   PetscFunctionBegin;
1582   for (i=0,j=0; i<n; j++) {
1583     row = idx[i];
1584     if (row%bs!=0) { /* Not the begining of a block */
1585       sizes[j] = 1;
1586       i++;
1587     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1588       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1589       i++;
1590     } else { /* Begining of the block, so check if the complete block exists */
1591       flg = PETSC_TRUE;
1592       for (k=1; k<bs; k++) {
1593         if (row+k != idx[i+k]) { /* break in the block */
1594           flg = PETSC_FALSE;
1595           break;
1596         }
1597       }
1598       if (flg) { /* No break in the bs */
1599         sizes[j] = bs;
1600         i+= bs;
1601       } else {
1602         sizes[j] = 1;
1603         i++;
1604       }
1605     }
1606   }
1607   *bs_max = j;
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1613 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1614 {
1615   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1616   PetscErrorCode ierr;
1617   PetscInt       i,j,k,count,*rows;
1618   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1619   PetscScalar    zero = 0.0;
1620   MatScalar      *aa;
1621 
1622   PetscFunctionBegin;
1623   /* Make a copy of the IS and  sort it */
1624   /* allocate memory for rows,sizes */
1625   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1626   sizes = rows + is_n;
1627 
1628   /* copy IS values to rows, and sort them */
1629   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1630   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1631   if (baij->keepzeroedrows) {
1632     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1633     bs_max = is_n;
1634     A->same_nonzero = PETSC_TRUE;
1635   } else {
1636     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1637     A->same_nonzero = PETSC_FALSE;
1638   }
1639 
1640   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1641     row   = rows[j];
1642     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1643     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1644     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1645     if (sizes[i] == bs && !baij->keepzeroedrows) {
1646       if (diag != 0.0) {
1647         if (baij->ilen[row/bs] > 0) {
1648           baij->ilen[row/bs]       = 1;
1649           baij->j[baij->i[row/bs]] = row/bs;
1650           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1651         }
1652         /* Now insert all the diagonal values for this bs */
1653         for (k=0; k<bs; k++) {
1654           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1655         }
1656       } else { /* (diag == 0.0) */
1657         baij->ilen[row/bs] = 0;
1658       } /* end (diag == 0.0) */
1659     } else { /* (sizes[i] != bs) */
1660 #if defined (PETSC_USE_DEBUG)
1661       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1662 #endif
1663       for (k=0; k<count; k++) {
1664         aa[0] =  zero;
1665         aa    += bs;
1666       }
1667       if (diag != 0.0) {
1668         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1669       }
1670     }
1671   }
1672 
1673   ierr = PetscFree(rows);CHKERRQ(ierr);
1674   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1680 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1681 {
1682   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1683   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1684   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1685   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1686   PetscErrorCode ierr;
1687   PetscInt       ridx,cidx,bs2=a->bs2;
1688   PetscTruth     roworiented=a->roworiented;
1689   MatScalar      *ap,value,*aa=a->a,*bap;
1690 
1691   PetscFunctionBegin;
1692   for (k=0; k<m; k++) { /* loop over added rows */
1693     row  = im[k];
1694     brow = row/bs;
1695     if (row < 0) continue;
1696 #if defined(PETSC_USE_DEBUG)
1697     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1698 #endif
1699     rp   = aj + ai[brow];
1700     ap   = aa + bs2*ai[brow];
1701     rmax = imax[brow];
1702     nrow = ailen[brow];
1703     low  = 0;
1704     high = nrow;
1705     for (l=0; l<n; l++) { /* loop over added columns */
1706       if (in[l] < 0) continue;
1707 #if defined(PETSC_USE_DEBUG)
1708       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1709 #endif
1710       col = in[l]; bcol = col/bs;
1711       ridx = row % bs; cidx = col % bs;
1712       if (roworiented) {
1713         value = v[l + k*n];
1714       } else {
1715         value = v[k + l*m];
1716       }
1717       if (col <= lastcol) low = 0; else high = nrow;
1718       lastcol = col;
1719       while (high-low > 7) {
1720         t = (low+high)/2;
1721         if (rp[t] > bcol) high = t;
1722         else              low  = t;
1723       }
1724       for (i=low; i<high; i++) {
1725         if (rp[i] > bcol) break;
1726         if (rp[i] == bcol) {
1727           bap  = ap +  bs2*i + bs*cidx + ridx;
1728           if (is == ADD_VALUES) *bap += value;
1729           else                  *bap  = value;
1730           goto noinsert1;
1731         }
1732       }
1733       if (nonew == 1) goto noinsert1;
1734       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1735       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1736       N = nrow++ - 1; high++;
1737       /* shift up all the later entries in this row */
1738       for (ii=N; ii>=i; ii--) {
1739         rp[ii+1] = rp[ii];
1740         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1741       }
1742       if (N>=i) {
1743         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1744       }
1745       rp[i]                      = bcol;
1746       ap[bs2*i + bs*cidx + ridx] = value;
1747       a->nz++;
1748       noinsert1:;
1749       low = i;
1750     }
1751     ailen[brow] = nrow;
1752   }
1753   A->same_nonzero = PETSC_FALSE;
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1760 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1761 {
1762   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1763   Mat            outA;
1764   PetscErrorCode ierr;
1765   PetscTruth     row_identity,col_identity;
1766 
1767   PetscFunctionBegin;
1768   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1769   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1770   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1771   if (!row_identity || !col_identity) {
1772     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1773   }
1774 
1775   outA          = inA;
1776   inA->factor   = FACTOR_LU;
1777 
1778   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1779 
1780   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1781   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1782   a->row = row;
1783   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1784   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1785   a->col = col;
1786 
1787   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1788   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1789   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1790 
1791   /*
1792       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1793       for ILU(0) factorization with natural ordering
1794   */
1795   if (inA->rmap.bs < 8) {
1796     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1797   } else {
1798     if (!a->solve_work) {
1799       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1800       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1801     }
1802   }
1803 
1804   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1805 
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 EXTERN_C_BEGIN
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1813 {
1814   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1815   PetscInt    i,nz,nbs;
1816 
1817   PetscFunctionBegin;
1818   nz  = baij->maxnz/baij->bs2;
1819   nbs = baij->nbs;
1820   for (i=0; i<nz; i++) {
1821     baij->j[i] = indices[i];
1822   }
1823   baij->nz = nz;
1824   for (i=0; i<nbs; i++) {
1825     baij->ilen[i] = baij->imax[i];
1826   }
1827 
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1834 /*@
1835     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1836        in the matrix.
1837 
1838   Input Parameters:
1839 +  mat - the SeqBAIJ matrix
1840 -  indices - the column indices
1841 
1842   Level: advanced
1843 
1844   Notes:
1845     This can be called if you have precomputed the nonzero structure of the
1846   matrix and want to provide it to the matrix object to improve the performance
1847   of the MatSetValues() operation.
1848 
1849     You MUST have set the correct numbers of nonzeros per row in the call to
1850   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1851 
1852     MUST be called before any calls to MatSetValues();
1853 
1854 @*/
1855 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1856 {
1857   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1858 
1859   PetscFunctionBegin;
1860   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1861   PetscValidPointer(indices,2);
1862   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1863   if (f) {
1864     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1865   } else {
1866     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1873 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1874 {
1875   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1876   PetscErrorCode ierr;
1877   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1878   PetscReal      atmp;
1879   PetscScalar    *x,zero = 0.0;
1880   MatScalar      *aa;
1881   PetscInt       ncols,brow,krow,kcol;
1882 
1883   PetscFunctionBegin;
1884   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1885   bs   = A->rmap.bs;
1886   aa   = a->a;
1887   ai   = a->i;
1888   aj   = a->j;
1889   mbs  = a->mbs;
1890 
1891   ierr = VecSet(v,zero);CHKERRQ(ierr);
1892   if (idx) {
1893     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1894   }
1895   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1896   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1897   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1898   for (i=0; i<mbs; i++) {
1899     ncols = ai[1] - ai[0]; ai++;
1900     brow  = bs*i;
1901     for (j=0; j<ncols; j++){
1902       for (kcol=0; kcol<bs; kcol++){
1903         for (krow=0; krow<bs; krow++){
1904           atmp = PetscAbsScalar(*aa);aa++;
1905           row = brow + krow;    /* row index */
1906           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1907           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1908         }
1909       }
1910       aj++;
1911     }
1912   }
1913   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatCopy_SeqBAIJ"
1919 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1920 {
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   /* If the two matrices have the same copy implementation, use fast copy. */
1925   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1926     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1927     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1928 
1929     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1930       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1931     }
1932     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1933   } else {
1934     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1935   }
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1941 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1942 {
1943   PetscErrorCode ierr;
1944 
1945   PetscFunctionBegin;
1946   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1952 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1953 {
1954   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1955   PetscFunctionBegin;
1956   *array = a->a;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1962 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1963 {
1964   PetscFunctionBegin;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #include "petscblaslapack.h"
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1971 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1972 {
1973   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1974   PetscErrorCode ierr;
1975   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1976   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1977 
1978   PetscFunctionBegin;
1979   if (str == SAME_NONZERO_PATTERN) {
1980     PetscScalar alpha = a;
1981     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1982   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1983     if (y->xtoy && y->XtoY != X) {
1984       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1985       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1986     }
1987     if (!y->xtoy) { /* get xtoy */
1988       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1989       y->XtoY = X;
1990     }
1991     bs2 = bs*bs;
1992     for (i=0; i<x->nz; i++) {
1993       j = 0;
1994       while (j < bs2){
1995         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1996         j++;
1997       }
1998     }
1999     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2000   } else {
2001     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2008 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2009 {
2010   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2011   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2012   PetscScalar    *aa = a->a;
2013 
2014   PetscFunctionBegin;
2015   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2021 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2022 {
2023   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2024   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2025   PetscScalar    *aa = a->a;
2026 
2027   PetscFunctionBegin;
2028   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 
2033 /* -------------------------------------------------------------------*/
2034 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2035        MatGetRow_SeqBAIJ,
2036        MatRestoreRow_SeqBAIJ,
2037        MatMult_SeqBAIJ_N,
2038 /* 4*/ MatMultAdd_SeqBAIJ_N,
2039        MatMultTranspose_SeqBAIJ,
2040        MatMultTransposeAdd_SeqBAIJ,
2041        MatSolve_SeqBAIJ_N,
2042        0,
2043        0,
2044 /*10*/ 0,
2045        MatLUFactor_SeqBAIJ,
2046        0,
2047        0,
2048        MatTranspose_SeqBAIJ,
2049 /*15*/ MatGetInfo_SeqBAIJ,
2050        MatEqual_SeqBAIJ,
2051        MatGetDiagonal_SeqBAIJ,
2052        MatDiagonalScale_SeqBAIJ,
2053        MatNorm_SeqBAIJ,
2054 /*20*/ 0,
2055        MatAssemblyEnd_SeqBAIJ,
2056        0,
2057        MatSetOption_SeqBAIJ,
2058        MatZeroEntries_SeqBAIJ,
2059 /*25*/ MatZeroRows_SeqBAIJ,
2060        MatLUFactorSymbolic_SeqBAIJ,
2061        MatLUFactorNumeric_SeqBAIJ_N,
2062        MatCholeskyFactorSymbolic_SeqBAIJ,
2063        MatCholeskyFactorNumeric_SeqBAIJ_N,
2064 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2065        MatILUFactorSymbolic_SeqBAIJ,
2066        MatICCFactorSymbolic_SeqBAIJ,
2067        MatGetArray_SeqBAIJ,
2068        MatRestoreArray_SeqBAIJ,
2069 /*35*/ MatDuplicate_SeqBAIJ,
2070        0,
2071        0,
2072        MatILUFactor_SeqBAIJ,
2073        0,
2074 /*40*/ MatAXPY_SeqBAIJ,
2075        MatGetSubMatrices_SeqBAIJ,
2076        MatIncreaseOverlap_SeqBAIJ,
2077        MatGetValues_SeqBAIJ,
2078        MatCopy_SeqBAIJ,
2079 /*45*/ 0,
2080        MatScale_SeqBAIJ,
2081        0,
2082        0,
2083        0,
2084 /*50*/ 0,
2085        MatGetRowIJ_SeqBAIJ,
2086        MatRestoreRowIJ_SeqBAIJ,
2087        0,
2088        0,
2089 /*55*/ 0,
2090        0,
2091        0,
2092        0,
2093        MatSetValuesBlocked_SeqBAIJ,
2094 /*60*/ MatGetSubMatrix_SeqBAIJ,
2095        MatDestroy_SeqBAIJ,
2096        MatView_SeqBAIJ,
2097        0,
2098        0,
2099 /*65*/ 0,
2100        0,
2101        0,
2102        0,
2103        0,
2104 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2105        MatConvert_Basic,
2106        0,
2107        0,
2108        0,
2109 /*75*/ 0,
2110        0,
2111        0,
2112        0,
2113        0,
2114 /*80*/ 0,
2115        0,
2116        0,
2117        0,
2118        MatLoad_SeqBAIJ,
2119 /*85*/ 0,
2120        0,
2121        0,
2122        0,
2123        0,
2124 /*90*/ 0,
2125        0,
2126        0,
2127        0,
2128        0,
2129 /*95*/ 0,
2130        0,
2131        0,
2132        0,
2133        0,
2134 /*100*/0,
2135        0,
2136        0,
2137        0,
2138        0,
2139 /*105*/0,
2140        MatRealPart_SeqBAIJ,
2141        MatImaginaryPart_SeqBAIJ,
2142        0,
2143        0,
2144 /*110*/0,
2145        0,
2146        0,
2147        0,
2148        MatMissingDiagonal_SeqBAIJ
2149 /*115*/
2150 };
2151 
2152 EXTERN_C_BEGIN
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2155 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2156 {
2157   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2158   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   if (aij->nonew != 1) {
2163     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2164   }
2165 
2166   /* allocate space for values if not already there */
2167   if (!aij->saved_values) {
2168     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2169   }
2170 
2171   /* copy values over */
2172   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 EXTERN_C_END
2176 
2177 EXTERN_C_BEGIN
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2180 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2181 {
2182   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2183   PetscErrorCode ierr;
2184   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2185 
2186   PetscFunctionBegin;
2187   if (aij->nonew != 1) {
2188     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2189   }
2190   if (!aij->saved_values) {
2191     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2192   }
2193 
2194   /* copy values over */
2195   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2196   PetscFunctionReturn(0);
2197 }
2198 EXTERN_C_END
2199 
2200 EXTERN_C_BEGIN
2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2202 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2203 EXTERN_C_END
2204 
2205 EXTERN_C_BEGIN
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2208 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2209 {
2210   Mat_SeqBAIJ    *b;
2211   PetscErrorCode ierr;
2212   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2213   PetscTruth     flg,skipallocation = PETSC_FALSE;
2214 
2215   PetscFunctionBegin;
2216 
2217   if (nz == MAT_SKIP_ALLOCATION) {
2218     skipallocation = PETSC_TRUE;
2219     nz             = 0;
2220   }
2221 
2222   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2223     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2224   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2225 
2226   if (nnz && newbs != bs) {
2227     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2228   }
2229   bs   = newbs;
2230 
2231   B->rmap.bs = B->cmap.bs = bs;
2232   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2233   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2234 
2235   B->preallocated = PETSC_TRUE;
2236 
2237   mbs  = B->rmap.n/bs;
2238   nbs  = B->cmap.n/bs;
2239   bs2  = bs*bs;
2240 
2241   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2242     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2243   }
2244 
2245   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2246   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2247   if (nnz) {
2248     for (i=0; i<mbs; i++) {
2249       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2250       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2251     }
2252   }
2253 
2254   b       = (Mat_SeqBAIJ*)B->data;
2255   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2256     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2257   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2258 
2259   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2260   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2261   if (!flg) {
2262     switch (bs) {
2263     case 1:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2265       B->ops->mult            = MatMult_SeqBAIJ_1;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2267       break;
2268     case 2:
2269       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2270       B->ops->mult            = MatMult_SeqBAIJ_2;
2271       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2272       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2273       break;
2274     case 3:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2276       B->ops->mult            = MatMult_SeqBAIJ_3;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2278       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2279       break;
2280     case 4:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2282       B->ops->mult            = MatMult_SeqBAIJ_4;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2284       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2285       break;
2286     case 5:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2288       B->ops->mult            = MatMult_SeqBAIJ_5;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2290       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2291       break;
2292     case 6:
2293       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2294       B->ops->mult            = MatMult_SeqBAIJ_6;
2295       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2296       break;
2297     case 7:
2298       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2299       B->ops->mult            = MatMult_SeqBAIJ_7;
2300       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2301       break;
2302     default:
2303       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2304       B->ops->mult            = MatMult_SeqBAIJ_N;
2305       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2306       break;
2307     }
2308   }
2309   B->rmap.bs      = bs;
2310   b->mbs     = mbs;
2311   b->nbs     = nbs;
2312   if (!skipallocation) {
2313     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2314     /* b->ilen will count nonzeros in each block row so far. */
2315     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2316     if (!nnz) {
2317       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2318       else if (nz <= 0)        nz = 1;
2319       for (i=0; i<mbs; i++) b->imax[i] = nz;
2320       nz = nz*mbs;
2321     } else {
2322       nz = 0;
2323       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2324     }
2325 
2326     /* allocate the matrix space */
2327     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2328     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2329     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2330     b->singlemalloc = PETSC_TRUE;
2331 
2332     b->i[0] = 0;
2333     for (i=1; i<mbs+1; i++) {
2334       b->i[i] = b->i[i-1] + b->imax[i-1];
2335     }
2336     b->free_a     = PETSC_TRUE;
2337     b->free_ij    = PETSC_TRUE;
2338   } else {
2339     b->free_a     = PETSC_FALSE;
2340     b->free_ij    = PETSC_FALSE;
2341   }
2342 
2343   B->rmap.bs          = bs;
2344   b->bs2              = bs2;
2345   b->mbs              = mbs;
2346   b->nz               = 0;
2347   b->maxnz            = nz*bs2;
2348   B->info.nz_unneeded = (PetscReal)b->maxnz;
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 /*MC
2354    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2355    block sparse compressed row format.
2356 
2357    Options Database Keys:
2358 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2359 
2360   Level: beginner
2361 
2362 .seealso: MatCreateSeqBAIJ()
2363 M*/
2364 
2365 EXTERN_C_BEGIN
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatCreate_SeqBAIJ"
2368 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2369 {
2370   PetscErrorCode ierr;
2371   PetscMPIInt    size;
2372   Mat_SeqBAIJ    *b;
2373 
2374   PetscFunctionBegin;
2375   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2376   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2377 
2378   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2379   B->data = (void*)b;
2380   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2381   B->factor           = 0;
2382   B->mapping          = 0;
2383   b->row              = 0;
2384   b->col              = 0;
2385   b->icol             = 0;
2386   b->reallocs         = 0;
2387   b->saved_values     = 0;
2388 #if defined(PETSC_USE_MAT_SINGLE)
2389   b->setvalueslen     = 0;
2390   b->setvaluescopy    = PETSC_NULL;
2391 #endif
2392 
2393   b->roworiented      = PETSC_TRUE;
2394   b->nonew            = 0;
2395   b->diag             = 0;
2396   b->solve_work       = 0;
2397   b->mult_work        = 0;
2398   B->spptr            = 0;
2399   B->info.nz_unneeded = (PetscReal)b->maxnz;
2400   b->keepzeroedrows   = PETSC_FALSE;
2401   b->xtoy              = 0;
2402   b->XtoY              = 0;
2403   b->compressedrow.use     = PETSC_FALSE;
2404   b->compressedrow.nrows   = 0;
2405   b->compressedrow.i       = PETSC_NULL;
2406   b->compressedrow.rindex  = PETSC_NULL;
2407   b->compressedrow.checked = PETSC_FALSE;
2408   B->same_nonzero          = PETSC_FALSE;
2409 
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2411                                      "MatInvertBlockDiagonal_SeqBAIJ",
2412                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2414                                      "MatStoreValues_SeqBAIJ",
2415                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2417                                      "MatRetrieveValues_SeqBAIJ",
2418                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2420                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2421                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2423                                      "MatConvert_SeqBAIJ_SeqAIJ",
2424                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2426                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2427                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2429                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2430                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2431   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 EXTERN_C_END
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2438 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2439 {
2440   Mat            C;
2441   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2442   PetscErrorCode ierr;
2443   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2444 
2445   PetscFunctionBegin;
2446   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2447 
2448   *B = 0;
2449   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2450   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2451   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2452   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2453   c    = (Mat_SeqBAIJ*)C->data;
2454 
2455   C->rmap.N   = A->rmap.N;
2456   C->cmap.N   = A->cmap.N;
2457   C->rmap.bs  = A->rmap.bs;
2458   c->bs2 = a->bs2;
2459   c->mbs = a->mbs;
2460   c->nbs = a->nbs;
2461 
2462   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2463   for (i=0; i<mbs; i++) {
2464     c->imax[i] = a->imax[i];
2465     c->ilen[i] = a->ilen[i];
2466   }
2467 
2468   /* allocate the matrix space */
2469   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2470   c->singlemalloc = PETSC_TRUE;
2471   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2472   if (mbs > 0) {
2473     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2474     if (cpvalues == MAT_COPY_VALUES) {
2475       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2476     } else {
2477       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2478     }
2479   }
2480   c->roworiented = a->roworiented;
2481   c->nonew       = a->nonew;
2482 
2483   if (a->diag) {
2484     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2485     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2486     for (i=0; i<mbs; i++) {
2487       c->diag[i] = a->diag[i];
2488     }
2489   } else c->diag        = 0;
2490   c->nz                 = a->nz;
2491   c->maxnz              = a->maxnz;
2492   c->solve_work         = 0;
2493   c->mult_work          = 0;
2494   c->free_a             = PETSC_TRUE;
2495   c->free_ij            = PETSC_TRUE;
2496   C->preallocated       = PETSC_TRUE;
2497   C->assembled          = PETSC_TRUE;
2498 
2499   c->compressedrow.use     = a->compressedrow.use;
2500   c->compressedrow.nrows   = a->compressedrow.nrows;
2501   c->compressedrow.checked = a->compressedrow.checked;
2502   if ( a->compressedrow.checked && a->compressedrow.use){
2503     i = a->compressedrow.nrows;
2504     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2505     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2506     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2507     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2508   } else {
2509     c->compressedrow.use    = PETSC_FALSE;
2510     c->compressedrow.i      = PETSC_NULL;
2511     c->compressedrow.rindex = PETSC_NULL;
2512   }
2513   C->same_nonzero = A->same_nonzero;
2514   *B = C;
2515   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatLoad_SeqBAIJ"
2521 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2522 {
2523   Mat_SeqBAIJ    *a;
2524   Mat            B;
2525   PetscErrorCode ierr;
2526   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2527   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2528   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2529   PetscInt       *masked,nmask,tmp,bs2,ishift;
2530   PetscMPIInt    size;
2531   int            fd;
2532   PetscScalar    *aa;
2533   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2537     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2538   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2539   bs2  = bs*bs;
2540 
2541   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2542   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2543   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2544   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2545   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2546   M = header[1]; N = header[2]; nz = header[3];
2547 
2548   if (header[3] < 0) {
2549     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2550   }
2551 
2552   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2553 
2554   /*
2555      This code adds extra rows to make sure the number of rows is
2556     divisible by the blocksize
2557   */
2558   mbs        = M/bs;
2559   extra_rows = bs - M + bs*(mbs);
2560   if (extra_rows == bs) extra_rows = 0;
2561   else                  mbs++;
2562   if (extra_rows) {
2563     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2564   }
2565 
2566   /* read in row lengths */
2567   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2568   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2569   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2570 
2571   /* read in column indices */
2572   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2573   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2574   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2575 
2576   /* loop over row lengths determining block row lengths */
2577   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2578   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2579   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2580   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2581   masked   = mask + mbs;
2582   rowcount = 0; nzcount = 0;
2583   for (i=0; i<mbs; i++) {
2584     nmask = 0;
2585     for (j=0; j<bs; j++) {
2586       kmax = rowlengths[rowcount];
2587       for (k=0; k<kmax; k++) {
2588         tmp = jj[nzcount++]/bs;
2589         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2590       }
2591       rowcount++;
2592     }
2593     browlengths[i] += nmask;
2594     /* zero out the mask elements we set */
2595     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2596   }
2597 
2598   /* create our matrix */
2599   ierr = MatCreate(comm,&B);
2600   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2601   ierr = MatSetType(B,type);CHKERRQ(ierr);
2602   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2603   a = (Mat_SeqBAIJ*)B->data;
2604 
2605   /* set matrix "i" values */
2606   a->i[0] = 0;
2607   for (i=1; i<= mbs; i++) {
2608     a->i[i]      = a->i[i-1] + browlengths[i-1];
2609     a->ilen[i-1] = browlengths[i-1];
2610   }
2611   a->nz         = 0;
2612   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2613 
2614   /* read in nonzero values */
2615   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2616   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2617   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2618 
2619   /* set "a" and "j" values into matrix */
2620   nzcount = 0; jcount = 0;
2621   for (i=0; i<mbs; i++) {
2622     nzcountb = nzcount;
2623     nmask    = 0;
2624     for (j=0; j<bs; j++) {
2625       kmax = rowlengths[i*bs+j];
2626       for (k=0; k<kmax; k++) {
2627         tmp = jj[nzcount++]/bs;
2628 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2629       }
2630     }
2631     /* sort the masked values */
2632     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2633 
2634     /* set "j" values into matrix */
2635     maskcount = 1;
2636     for (j=0; j<nmask; j++) {
2637       a->j[jcount++]  = masked[j];
2638       mask[masked[j]] = maskcount++;
2639     }
2640     /* set "a" values into matrix */
2641     ishift = bs2*a->i[i];
2642     for (j=0; j<bs; j++) {
2643       kmax = rowlengths[i*bs+j];
2644       for (k=0; k<kmax; k++) {
2645         tmp       = jj[nzcountb]/bs ;
2646         block     = mask[tmp] - 1;
2647         point     = jj[nzcountb] - bs*tmp;
2648         idx       = ishift + bs2*block + j + bs*point;
2649         a->a[idx] = (MatScalar)aa[nzcountb++];
2650       }
2651     }
2652     /* zero out the mask elements we set */
2653     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2654   }
2655   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2656 
2657   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2658   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2659   ierr = PetscFree(aa);CHKERRQ(ierr);
2660   ierr = PetscFree(jj);CHKERRQ(ierr);
2661   ierr = PetscFree(mask);CHKERRQ(ierr);
2662 
2663   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = MatView_Private(B);CHKERRQ(ierr);
2666 
2667   *A = B;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatCreateSeqBAIJ"
2673 /*@C
2674    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2675    compressed row) format.  For good matrix assembly performance the
2676    user should preallocate the matrix storage by setting the parameter nz
2677    (or the array nnz).  By setting these parameters accurately, performance
2678    during matrix assembly can be increased by more than a factor of 50.
2679 
2680    Collective on MPI_Comm
2681 
2682    Input Parameters:
2683 +  comm - MPI communicator, set to PETSC_COMM_SELF
2684 .  bs - size of block
2685 .  m - number of rows
2686 .  n - number of columns
2687 .  nz - number of nonzero blocks  per block row (same for all rows)
2688 -  nnz - array containing the number of nonzero blocks in the various block rows
2689          (possibly different for each block row) or PETSC_NULL
2690 
2691    Output Parameter:
2692 .  A - the matrix
2693 
2694    Options Database Keys:
2695 .   -mat_no_unroll - uses code that does not unroll the loops in the
2696                      block calculations (much slower)
2697 .    -mat_block_size - size of the blocks to use
2698 
2699    Level: intermediate
2700 
2701    Notes:
2702    The number of rows and columns must be divisible by blocksize.
2703 
2704    If the nnz parameter is given then the nz parameter is ignored
2705 
2706    A nonzero block is any block that as 1 or more nonzeros in it
2707 
2708    The block AIJ format is fully compatible with standard Fortran 77
2709    storage.  That is, the stored row and column indices can begin at
2710    either one (as in Fortran) or zero.  See the users' manual for details.
2711 
2712    Specify the preallocated storage with either nz or nnz (not both).
2713    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2714    allocation.  For additional details, see the users manual chapter on
2715    matrices.
2716 
2717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2718 @*/
2719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2720 {
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2725   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2726   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2727   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2733 /*@C
2734    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2735    per row in the matrix. For good matrix assembly performance the
2736    user should preallocate the matrix storage by setting the parameter nz
2737    (or the array nnz).  By setting these parameters accurately, performance
2738    during matrix assembly can be increased by more than a factor of 50.
2739 
2740    Collective on MPI_Comm
2741 
2742    Input Parameters:
2743 +  A - the matrix
2744 .  bs - size of block
2745 .  nz - number of block nonzeros per block row (same for all rows)
2746 -  nnz - array containing the number of block nonzeros in the various block rows
2747          (possibly different for each block row) or PETSC_NULL
2748 
2749    Options Database Keys:
2750 .   -mat_no_unroll - uses code that does not unroll the loops in the
2751                      block calculations (much slower)
2752 .    -mat_block_size - size of the blocks to use
2753 
2754    Level: intermediate
2755 
2756    Notes:
2757    If the nnz parameter is given then the nz parameter is ignored
2758 
2759    The block AIJ format is fully compatible with standard Fortran 77
2760    storage.  That is, the stored row and column indices can begin at
2761    either one (as in Fortran) or zero.  See the users' manual for details.
2762 
2763    Specify the preallocated storage with either nz or nnz (not both).
2764    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2765    allocation.  For additional details, see the users manual chapter on
2766    matrices.
2767 
2768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2769 @*/
2770 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2771 {
2772   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2773 
2774   PetscFunctionBegin;
2775   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2776   if (f) {
2777     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2778   }
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2784 /*@
2785      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2786               (upper triangular entries in CSR format) provided by the user.
2787 
2788      Collective on MPI_Comm
2789 
2790    Input Parameters:
2791 +  comm - must be an MPI communicator of size 1
2792 .  bs - size of block
2793 .  m - number of rows
2794 .  n - number of columns
2795 .  i - row indices
2796 .  j - column indices
2797 -  a - matrix values
2798 
2799    Output Parameter:
2800 .  mat - the matrix
2801 
2802    Level: intermediate
2803 
2804    Notes:
2805        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2806     once the matrix is destroyed
2807 
2808        You cannot set new nonzero locations into this matrix, that will generate an error.
2809 
2810        The i and j indices are 0 based
2811 
2812 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2813 
2814 @*/
2815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2816 {
2817   PetscErrorCode ierr;
2818   PetscInt       ii;
2819   Mat_SeqBAIJ    *baij;
2820 
2821   PetscFunctionBegin;
2822   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2823   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2824 
2825   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2826   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2827   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2828   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2829   baij = (Mat_SeqBAIJ*)(*mat)->data;
2830   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2831 
2832   baij->i = i;
2833   baij->j = j;
2834   baij->a = a;
2835   baij->singlemalloc = PETSC_FALSE;
2836   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2837   baij->free_a       = PETSC_FALSE;
2838   baij->free_ij       = PETSC_FALSE;
2839 
2840   for (ii=0; ii<m; ii++) {
2841     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2842 #if defined(PETSC_USE_DEBUG)
2843     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2844 #endif
2845   }
2846 #if defined(PETSC_USE_DEBUG)
2847   for (ii=0; ii<baij->i[m]; ii++) {
2848     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2849     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2850   }
2851 #endif
2852 
2853   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2854   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857