xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4e0d8c253c5baa1c48327ddb2b78bf7a43f58497)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785   PetscInt     *tia, *tja;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
792   } else {
793     tia = a->i; tja = a->j;
794   }
795 
796   if (!blockcompressed) {
797     /* malloc & create the natural set of indices */
798     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
799     for (i=0; i<n+1; i++) {
800       for (j=0; j<bs; j++) {
801         *ia[i*bs+j] = tia[i]*bs+j;
802       }
803     }
804     for (i=0; i<nz; i++) {
805       for (j=0; j<bs; j++) {
806         *ja[i*bs+j] = tia[i]*bs+j;
807       }
808     }
809     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810       ierr = PetscFree(tia);CHKERRQ(ierr);
811       ierr = PetscFree(tja);CHKERRQ(ierr);
812     }
813   } else {
814     *ia = tia;
815     *ja = tja;
816   }
817   if (oshift == 1) {
818     for (i=0; i<nz; i++) (*ja)[i]++;
819     for (i=0; i<n+1; i++) (*ia)[i]++;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,n = a->mbs,nz = a->i[n];
831 
832   PetscFunctionBegin;
833   if (!ia) PetscFunctionReturn(0);
834   if (!blockcompressed) {
835     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
836   }else if (symmetric) {
837     ierr = PetscFree(*ia);CHKERRQ(ierr);
838     ierr = PetscFree(*ja);CHKERRQ(ierr);
839   } else if (oshift == 1) { /* blockcompressed */
840     for (i=0; i<nz; i++) a->j[i]--;
841     for (i=0; i<n+1; i++) a->i[i]--;
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDestroy_SeqBAIJ"
848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849 {
850   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854 #if defined(PETSC_USE_LOG)
855   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856 #endif
857   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
858   if (a->row) {
859     ierr = ISDestroy(a->row);CHKERRQ(ierr);
860   }
861   if (a->col) {
862     ierr = ISDestroy(a->col);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(a->diag);CHKERRQ(ierr);
865   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
866   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
867   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
868   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
869   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
870   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
871 #if defined(PETSC_USE_MAT_SINGLE)
872   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
873 #endif
874   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
875   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
876 
877   ierr = PetscFree(a);CHKERRQ(ierr);
878 
879   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
880   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
881   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
882   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetOption_SeqBAIJ"
892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
893 {
894   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   switch (op) {
899   case MAT_ROW_ORIENTED:
900     a->roworiented    = flg;
901     break;
902   case MAT_COLUMNS_SORTED:
903     a->sorted         = flg;
904     break;
905   case MAT_KEEP_ZEROED_ROWS:
906     a->keepzeroedrows = flg;
907     break;
908   case MAT_NO_NEW_NONZERO_LOCATIONS:
909     a->nonew          = (flg ? 1 : 0);
910     break;
911   case MAT_NEW_NONZERO_LOCATION_ERR:
912     a->nonew          = (flg ? -1 : 0);
913     break;
914   case MAT_NEW_NONZERO_ALLOCATION_ERR:
915     a->nonew          = (flg ? -2 : 0);
916     break;
917   case MAT_ROWS_SORTED:
918   case MAT_NEW_DIAGONALS:
919   case MAT_IGNORE_OFF_PROC_ENTRIES:
920   case MAT_USE_HASH_TABLE:
921     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
922     break;
923   case MAT_SYMMETRIC:
924   case MAT_STRUCTURALLY_SYMMETRIC:
925   case MAT_HERMITIAN:
926   case MAT_SYMMETRY_ETERNAL:
927     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
928     break;
929   default:
930     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatGetRow_SeqBAIJ"
937 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
938 {
939   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
940   PetscErrorCode ierr;
941   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
942   MatScalar      *aa,*aa_i;
943   PetscScalar    *v_i;
944 
945   PetscFunctionBegin;
946   bs  = A->rmap.bs;
947   ai  = a->i;
948   aj  = a->j;
949   aa  = a->a;
950   bs2 = a->bs2;
951 
952   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
953 
954   bn  = row/bs;   /* Block number */
955   bp  = row % bs; /* Block Position */
956   M   = ai[bn+1] - ai[bn];
957   *nz = bs*M;
958 
959   if (v) {
960     *v = 0;
961     if (*nz) {
962       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
963       for (i=0; i<M; i++) { /* for each block in the block row */
964         v_i  = *v + i*bs;
965         aa_i = aa + bs2*(ai[bn] + i);
966         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
967       }
968     }
969   }
970 
971   if (idx) {
972     *idx = 0;
973     if (*nz) {
974       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
975       for (i=0; i<M; i++) { /* for each block in the block row */
976         idx_i = *idx + i*bs;
977         itmp  = bs*aj[ai[bn] + i];
978         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
979       }
980     }
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
987 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
988 {
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
993   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatTranspose_SeqBAIJ"
999 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1000 {
1001   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1002   Mat            C;
1003   PetscErrorCode ierr;
1004   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1005   PetscInt       *rows,*cols,bs2=a->bs2;
1006   PetscScalar    *array;
1007 
1008   PetscFunctionBegin;
1009   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1010   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1011   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1012 
1013 #if defined(PETSC_USE_MAT_SINGLE)
1014   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1015   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1016 #else
1017   array = a->a;
1018 #endif
1019 
1020   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1021   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1022   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1023   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1024   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1025   ierr = PetscFree(col);CHKERRQ(ierr);
1026   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1027   cols = rows + bs;
1028   for (i=0; i<mbs; i++) {
1029     cols[0] = i*bs;
1030     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1031     len = ai[i+1] - ai[i];
1032     for (j=0; j<len; j++) {
1033       rows[0] = (*aj++)*bs;
1034       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1035       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1036       array += bs2;
1037     }
1038   }
1039   ierr = PetscFree(rows);CHKERRQ(ierr);
1040 #if defined(PETSC_USE_MAT_SINGLE)
1041   ierr = PetscFree(array);
1042 #endif
1043 
1044   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046 
1047   if (B) {
1048     *B = C;
1049   } else {
1050     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1057 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1058 {
1059   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1062   int            fd;
1063   PetscScalar    *aa;
1064   FILE           *file;
1065 
1066   PetscFunctionBegin;
1067   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1068   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1069   col_lens[0] = MAT_FILE_COOKIE;
1070 
1071   col_lens[1] = A->rmap.N;
1072   col_lens[2] = A->cmap.n;
1073   col_lens[3] = a->nz*bs2;
1074 
1075   /* store lengths of each row and write (including header) to file */
1076   count = 0;
1077   for (i=0; i<a->mbs; i++) {
1078     for (j=0; j<bs; j++) {
1079       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1080     }
1081   }
1082   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1084 
1085   /* store column indices (zero start index) */
1086   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1087   count = 0;
1088   for (i=0; i<a->mbs; i++) {
1089     for (j=0; j<bs; j++) {
1090       for (k=a->i[i]; k<a->i[i+1]; k++) {
1091         for (l=0; l<bs; l++) {
1092           jj[count++] = bs*a->j[k] + l;
1093         }
1094       }
1095     }
1096   }
1097   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1098   ierr = PetscFree(jj);CHKERRQ(ierr);
1099 
1100   /* store nonzero values */
1101   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1102   count = 0;
1103   for (i=0; i<a->mbs; i++) {
1104     for (j=0; j<bs; j++) {
1105       for (k=a->i[i]; k<a->i[i+1]; k++) {
1106         for (l=0; l<bs; l++) {
1107           aa[count++] = a->a[bs2*k + l*bs + j];
1108         }
1109       }
1110     }
1111   }
1112   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1113   ierr = PetscFree(aa);CHKERRQ(ierr);
1114 
1115   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1116   if (file) {
1117     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1124 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1125 {
1126   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1127   PetscErrorCode    ierr;
1128   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1129   PetscViewerFormat format;
1130 
1131   PetscFunctionBegin;
1132   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1133   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1134     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1135   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1136     Mat aij;
1137     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1138     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1139     ierr = MatDestroy(aij);CHKERRQ(ierr);
1140   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1141      PetscFunctionReturn(0);
1142   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1143     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1144     for (i=0; i<a->mbs; i++) {
1145       for (j=0; j<bs; j++) {
1146         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1147         for (k=a->i[i]; k<a->i[i+1]; k++) {
1148           for (l=0; l<bs; l++) {
1149 #if defined(PETSC_USE_COMPLEX)
1150             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1151               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1152                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1153             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1154               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1155                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1156             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1157               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1158             }
1159 #else
1160             if (a->a[bs2*k + l*bs + j] != 0.0) {
1161               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1162             }
1163 #endif
1164           }
1165         }
1166         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1167       }
1168     }
1169     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1170   } else {
1171     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1172     for (i=0; i<a->mbs; i++) {
1173       for (j=0; j<bs; j++) {
1174         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1175         for (k=a->i[i]; k<a->i[i+1]; k++) {
1176           for (l=0; l<bs; l++) {
1177 #if defined(PETSC_USE_COMPLEX)
1178             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1179               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1180                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1181             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1182               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1183                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1184             } else {
1185               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1186             }
1187 #else
1188             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1189 #endif
1190           }
1191         }
1192         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1193       }
1194     }
1195     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1196   }
1197   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1203 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1204 {
1205   Mat            A = (Mat) Aa;
1206   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1207   PetscErrorCode ierr;
1208   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1209   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1210   MatScalar      *aa;
1211   PetscViewer    viewer;
1212 
1213   PetscFunctionBegin;
1214 
1215   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1216   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1217 
1218   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1219 
1220   /* loop over matrix elements drawing boxes */
1221   color = PETSC_DRAW_BLUE;
1222   for (i=0,row=0; i<mbs; i++,row+=bs) {
1223     for (j=a->i[i]; j<a->i[i+1]; j++) {
1224       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1225       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1226       aa = a->a + j*bs2;
1227       for (k=0; k<bs; k++) {
1228         for (l=0; l<bs; l++) {
1229           if (PetscRealPart(*aa++) >=  0.) continue;
1230           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1231         }
1232       }
1233     }
1234   }
1235   color = PETSC_DRAW_CYAN;
1236   for (i=0,row=0; i<mbs; i++,row+=bs) {
1237     for (j=a->i[i]; j<a->i[i+1]; j++) {
1238       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1239       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1240       aa = a->a + j*bs2;
1241       for (k=0; k<bs; k++) {
1242         for (l=0; l<bs; l++) {
1243           if (PetscRealPart(*aa++) != 0.) continue;
1244           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1245         }
1246       }
1247     }
1248   }
1249 
1250   color = PETSC_DRAW_RED;
1251   for (i=0,row=0; i<mbs; i++,row+=bs) {
1252     for (j=a->i[i]; j<a->i[i+1]; j++) {
1253       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1255       aa = a->a + j*bs2;
1256       for (k=0; k<bs; k++) {
1257         for (l=0; l<bs; l++) {
1258           if (PetscRealPart(*aa++) <= 0.) continue;
1259           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1260         }
1261       }
1262     }
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1269 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1270 {
1271   PetscErrorCode ierr;
1272   PetscReal      xl,yl,xr,yr,w,h;
1273   PetscDraw      draw;
1274   PetscTruth     isnull;
1275 
1276   PetscFunctionBegin;
1277 
1278   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1279   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1280 
1281   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1282   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1283   xr += w;    yr += h;  xl = -w;     yl = -h;
1284   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1285   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1286   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatView_SeqBAIJ"
1292 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1293 {
1294   PetscErrorCode ierr;
1295   PetscTruth     iascii,isbinary,isdraw;
1296 
1297   PetscFunctionBegin;
1298   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1299   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1300   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1301   if (iascii){
1302     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1303   } else if (isbinary) {
1304     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1305   } else if (isdraw) {
1306     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1307   } else {
1308     Mat B;
1309     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1310     ierr = MatView(B,viewer);CHKERRQ(ierr);
1311     ierr = MatDestroy(B);CHKERRQ(ierr);
1312   }
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1319 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1320 {
1321   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1322   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1323   PetscInt    *ai = a->i,*ailen = a->ilen;
1324   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1325   MatScalar   *ap,*aa = a->a;
1326 
1327   PetscFunctionBegin;
1328   for (k=0; k<m; k++) { /* loop over rows */
1329     row  = im[k]; brow = row/bs;
1330     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1331     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1332     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1333     nrow = ailen[brow];
1334     for (l=0; l<n; l++) { /* loop over columns */
1335       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1336       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1337       col  = in[l] ;
1338       bcol = col/bs;
1339       cidx = col%bs;
1340       ridx = row%bs;
1341       high = nrow;
1342       low  = 0; /* assume unsorted */
1343       while (high-low > 5) {
1344         t = (low+high)/2;
1345         if (rp[t] > bcol) high = t;
1346         else             low  = t;
1347       }
1348       for (i=low; i<high; i++) {
1349         if (rp[i] > bcol) break;
1350         if (rp[i] == bcol) {
1351           *v++ = ap[bs2*i+bs*cidx+ridx];
1352           goto finished;
1353         }
1354       }
1355       *v++ = 0.0;
1356       finished:;
1357     }
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #if defined(PETSC_USE_MAT_SINGLE)
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1365 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1366 {
1367   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1368   PetscErrorCode ierr;
1369   PetscInt       i,N = m*n*b->bs2;
1370   MatScalar      *vsingle;
1371 
1372   PetscFunctionBegin;
1373   if (N > b->setvalueslen) {
1374     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1375     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1376     b->setvalueslen  = N;
1377   }
1378   vsingle = b->setvaluescopy;
1379   for (i=0; i<N; i++) {
1380     vsingle[i] = v[i];
1381   }
1382   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 #endif
1386 
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1390 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1391 {
1392   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1393   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1394   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1395   PetscErrorCode  ierr;
1396   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1397   PetscTruth      roworiented=a->roworiented;
1398   const MatScalar *value = v;
1399   MatScalar       *ap,*aa = a->a,*bap;
1400 
1401   PetscFunctionBegin;
1402   if (roworiented) {
1403     stepval = (n-1)*bs;
1404   } else {
1405     stepval = (m-1)*bs;
1406   }
1407   for (k=0; k<m; k++) { /* loop over added rows */
1408     row  = im[k];
1409     if (row < 0) continue;
1410 #if defined(PETSC_USE_DEBUG)
1411     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1412 #endif
1413     rp   = aj + ai[row];
1414     ap   = aa + bs2*ai[row];
1415     rmax = imax[row];
1416     nrow = ailen[row];
1417     low  = 0;
1418     high = nrow;
1419     for (l=0; l<n; l++) { /* loop over added columns */
1420       if (in[l] < 0) continue;
1421 #if defined(PETSC_USE_DEBUG)
1422       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1423 #endif
1424       col = in[l];
1425       if (roworiented) {
1426         value = v + k*(stepval+bs)*bs + l*bs;
1427       } else {
1428         value = v + l*(stepval+bs)*bs + k*bs;
1429       }
1430       if (col <= lastcol) low = 0; else high = nrow;
1431       lastcol = col;
1432       while (high-low > 7) {
1433         t = (low+high)/2;
1434         if (rp[t] > col) high = t;
1435         else             low  = t;
1436       }
1437       for (i=low; i<high; i++) {
1438         if (rp[i] > col) break;
1439         if (rp[i] == col) {
1440           bap  = ap +  bs2*i;
1441           if (roworiented) {
1442             if (is == ADD_VALUES) {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=ii; jj<bs2; jj+=bs) {
1445                   bap[jj] += *value++;
1446                 }
1447               }
1448             } else {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=ii; jj<bs2; jj+=bs) {
1451                   bap[jj] = *value++;
1452                 }
1453               }
1454             }
1455           } else {
1456             if (is == ADD_VALUES) {
1457               for (ii=0; ii<bs; ii++,value+=stepval) {
1458                 for (jj=0; jj<bs; jj++) {
1459                   *bap++ += *value++;
1460                 }
1461               }
1462             } else {
1463               for (ii=0; ii<bs; ii++,value+=stepval) {
1464                 for (jj=0; jj<bs; jj++) {
1465                   *bap++  = *value++;
1466                 }
1467               }
1468             }
1469           }
1470           goto noinsert2;
1471         }
1472       }
1473       if (nonew == 1) goto noinsert2;
1474       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1475       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1476       N = nrow++ - 1; high++;
1477       /* shift up all the later entries in this row */
1478       for (ii=N; ii>=i; ii--) {
1479         rp[ii+1] = rp[ii];
1480         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1481       }
1482       if (N >= i) {
1483         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1484       }
1485       rp[i] = col;
1486       bap   = ap +  bs2*i;
1487       if (roworiented) {
1488         for (ii=0; ii<bs; ii++,value+=stepval) {
1489           for (jj=ii; jj<bs2; jj+=bs) {
1490             bap[jj] = *value++;
1491           }
1492         }
1493       } else {
1494         for (ii=0; ii<bs; ii++,value+=stepval) {
1495           for (jj=0; jj<bs; jj++) {
1496             *bap++  = *value++;
1497           }
1498         }
1499       }
1500       noinsert2:;
1501       low = i;
1502     }
1503     ailen[row] = nrow;
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1510 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1511 {
1512   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1513   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1514   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1515   PetscErrorCode ierr;
1516   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1517   MatScalar      *aa = a->a,*ap;
1518   PetscReal      ratio=0.6;
1519 
1520   PetscFunctionBegin;
1521   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1522 
1523   if (m) rmax = ailen[0];
1524   for (i=1; i<mbs; i++) {
1525     /* move each row back by the amount of empty slots (fshift) before it*/
1526     fshift += imax[i-1] - ailen[i-1];
1527     rmax   = PetscMax(rmax,ailen[i]);
1528     if (fshift) {
1529       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1530       N = ailen[i];
1531       for (j=0; j<N; j++) {
1532         ip[j-fshift] = ip[j];
1533         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1534       }
1535     }
1536     ai[i] = ai[i-1] + ailen[i-1];
1537   }
1538   if (mbs) {
1539     fshift += imax[mbs-1] - ailen[mbs-1];
1540     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1541   }
1542   /* reset ilen and imax for each row */
1543   for (i=0; i<mbs; i++) {
1544     ailen[i] = imax[i] = ai[i+1] - ai[i];
1545   }
1546   a->nz = ai[mbs];
1547 
1548   /* diagonals may have moved, so kill the diagonal pointers */
1549   a->idiagvalid = PETSC_FALSE;
1550   if (fshift && a->diag) {
1551     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1552     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1553     a->diag = 0;
1554   }
1555   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1556   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1557   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1558   a->reallocs          = 0;
1559   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1560 
1561   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1562   if (a->compressedrow.use){
1563     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1564   }
1565 
1566   A->same_nonzero = PETSC_TRUE;
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /*
1571    This function returns an array of flags which indicate the locations of contiguous
1572    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1573    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1574    Assume: sizes should be long enough to hold all the values.
1575 */
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1578 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1579 {
1580   PetscInt   i,j,k,row;
1581   PetscTruth flg;
1582 
1583   PetscFunctionBegin;
1584   for (i=0,j=0; i<n; j++) {
1585     row = idx[i];
1586     if (row%bs!=0) { /* Not the begining of a block */
1587       sizes[j] = 1;
1588       i++;
1589     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1590       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1591       i++;
1592     } else { /* Begining of the block, so check if the complete block exists */
1593       flg = PETSC_TRUE;
1594       for (k=1; k<bs; k++) {
1595         if (row+k != idx[i+k]) { /* break in the block */
1596           flg = PETSC_FALSE;
1597           break;
1598         }
1599       }
1600       if (flg) { /* No break in the bs */
1601         sizes[j] = bs;
1602         i+= bs;
1603       } else {
1604         sizes[j] = 1;
1605         i++;
1606       }
1607     }
1608   }
1609   *bs_max = j;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1615 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1616 {
1617   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1618   PetscErrorCode ierr;
1619   PetscInt       i,j,k,count,*rows;
1620   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1621   PetscScalar    zero = 0.0;
1622   MatScalar      *aa;
1623 
1624   PetscFunctionBegin;
1625   /* Make a copy of the IS and  sort it */
1626   /* allocate memory for rows,sizes */
1627   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1628   sizes = rows + is_n;
1629 
1630   /* copy IS values to rows, and sort them */
1631   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1632   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1633   if (baij->keepzeroedrows) {
1634     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1635     bs_max = is_n;
1636     A->same_nonzero = PETSC_TRUE;
1637   } else {
1638     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1639     A->same_nonzero = PETSC_FALSE;
1640   }
1641 
1642   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1643     row   = rows[j];
1644     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1645     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1646     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1647     if (sizes[i] == bs && !baij->keepzeroedrows) {
1648       if (diag != 0.0) {
1649         if (baij->ilen[row/bs] > 0) {
1650           baij->ilen[row/bs]       = 1;
1651           baij->j[baij->i[row/bs]] = row/bs;
1652           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1653         }
1654         /* Now insert all the diagonal values for this bs */
1655         for (k=0; k<bs; k++) {
1656           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1657         }
1658       } else { /* (diag == 0.0) */
1659         baij->ilen[row/bs] = 0;
1660       } /* end (diag == 0.0) */
1661     } else { /* (sizes[i] != bs) */
1662 #if defined (PETSC_USE_DEBUG)
1663       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1664 #endif
1665       for (k=0; k<count; k++) {
1666         aa[0] =  zero;
1667         aa    += bs;
1668       }
1669       if (diag != 0.0) {
1670         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1671       }
1672     }
1673   }
1674 
1675   ierr = PetscFree(rows);CHKERRQ(ierr);
1676   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1682 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1683 {
1684   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1685   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1686   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1687   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1688   PetscErrorCode ierr;
1689   PetscInt       ridx,cidx,bs2=a->bs2;
1690   PetscTruth     roworiented=a->roworiented;
1691   MatScalar      *ap,value,*aa=a->a,*bap;
1692 
1693   PetscFunctionBegin;
1694   for (k=0; k<m; k++) { /* loop over added rows */
1695     row  = im[k];
1696     brow = row/bs;
1697     if (row < 0) continue;
1698 #if defined(PETSC_USE_DEBUG)
1699     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1700 #endif
1701     rp   = aj + ai[brow];
1702     ap   = aa + bs2*ai[brow];
1703     rmax = imax[brow];
1704     nrow = ailen[brow];
1705     low  = 0;
1706     high = nrow;
1707     for (l=0; l<n; l++) { /* loop over added columns */
1708       if (in[l] < 0) continue;
1709 #if defined(PETSC_USE_DEBUG)
1710       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1711 #endif
1712       col = in[l]; bcol = col/bs;
1713       ridx = row % bs; cidx = col % bs;
1714       if (roworiented) {
1715         value = v[l + k*n];
1716       } else {
1717         value = v[k + l*m];
1718       }
1719       if (col <= lastcol) low = 0; else high = nrow;
1720       lastcol = col;
1721       while (high-low > 7) {
1722         t = (low+high)/2;
1723         if (rp[t] > bcol) high = t;
1724         else              low  = t;
1725       }
1726       for (i=low; i<high; i++) {
1727         if (rp[i] > bcol) break;
1728         if (rp[i] == bcol) {
1729           bap  = ap +  bs2*i + bs*cidx + ridx;
1730           if (is == ADD_VALUES) *bap += value;
1731           else                  *bap  = value;
1732           goto noinsert1;
1733         }
1734       }
1735       if (nonew == 1) goto noinsert1;
1736       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1737       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1738       N = nrow++ - 1; high++;
1739       /* shift up all the later entries in this row */
1740       for (ii=N; ii>=i; ii--) {
1741         rp[ii+1] = rp[ii];
1742         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1743       }
1744       if (N>=i) {
1745         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1746       }
1747       rp[i]                      = bcol;
1748       ap[bs2*i + bs*cidx + ridx] = value;
1749       a->nz++;
1750       noinsert1:;
1751       low = i;
1752     }
1753     ailen[brow] = nrow;
1754   }
1755   A->same_nonzero = PETSC_FALSE;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1762 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1763 {
1764   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1765   Mat            outA;
1766   PetscErrorCode ierr;
1767   PetscTruth     row_identity,col_identity;
1768 
1769   PetscFunctionBegin;
1770   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1771   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1772   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1773   if (!row_identity || !col_identity) {
1774     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1775   }
1776 
1777   outA          = inA;
1778   inA->factor   = FACTOR_LU;
1779 
1780   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1781 
1782   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1783   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1784   a->row = row;
1785   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1786   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1787   a->col = col;
1788 
1789   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1790   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1791   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1792 
1793   /*
1794       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1795       for ILU(0) factorization with natural ordering
1796   */
1797   if (inA->rmap.bs < 8) {
1798     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1799   } else {
1800     if (!a->solve_work) {
1801       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1802       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1803     }
1804   }
1805 
1806   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1807 
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 EXTERN_C_BEGIN
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1814 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1815 {
1816   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1817   PetscInt    i,nz,nbs;
1818 
1819   PetscFunctionBegin;
1820   nz  = baij->maxnz/baij->bs2;
1821   nbs = baij->nbs;
1822   for (i=0; i<nz; i++) {
1823     baij->j[i] = indices[i];
1824   }
1825   baij->nz = nz;
1826   for (i=0; i<nbs; i++) {
1827     baij->ilen[i] = baij->imax[i];
1828   }
1829 
1830   PetscFunctionReturn(0);
1831 }
1832 EXTERN_C_END
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1836 /*@
1837     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1838        in the matrix.
1839 
1840   Input Parameters:
1841 +  mat - the SeqBAIJ matrix
1842 -  indices - the column indices
1843 
1844   Level: advanced
1845 
1846   Notes:
1847     This can be called if you have precomputed the nonzero structure of the
1848   matrix and want to provide it to the matrix object to improve the performance
1849   of the MatSetValues() operation.
1850 
1851     You MUST have set the correct numbers of nonzeros per row in the call to
1852   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1853 
1854     MUST be called before any calls to MatSetValues();
1855 
1856 @*/
1857 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1858 {
1859   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1860 
1861   PetscFunctionBegin;
1862   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1863   PetscValidPointer(indices,2);
1864   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1865   if (f) {
1866     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1867   } else {
1868     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1875 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1876 {
1877   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1878   PetscErrorCode ierr;
1879   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1880   PetscReal      atmp;
1881   PetscScalar    *x,zero = 0.0;
1882   MatScalar      *aa;
1883   PetscInt       ncols,brow,krow,kcol;
1884 
1885   PetscFunctionBegin;
1886   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1887   bs   = A->rmap.bs;
1888   aa   = a->a;
1889   ai   = a->i;
1890   aj   = a->j;
1891   mbs  = a->mbs;
1892 
1893   ierr = VecSet(v,zero);CHKERRQ(ierr);
1894   if (idx) {
1895     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1896   }
1897   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1898   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1899   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1900   for (i=0; i<mbs; i++) {
1901     ncols = ai[1] - ai[0]; ai++;
1902     brow  = bs*i;
1903     for (j=0; j<ncols; j++){
1904       for (kcol=0; kcol<bs; kcol++){
1905         for (krow=0; krow<bs; krow++){
1906           atmp = PetscAbsScalar(*aa);aa++;
1907           row = brow + krow;    /* row index */
1908           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1909           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1910         }
1911       }
1912       aj++;
1913     }
1914   }
1915   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatCopy_SeqBAIJ"
1921 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1922 {
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   /* If the two matrices have the same copy implementation, use fast copy. */
1927   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1928     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1929     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1930 
1931     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1932       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1933     }
1934     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1935   } else {
1936     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1943 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1944 {
1945   PetscErrorCode ierr;
1946 
1947   PetscFunctionBegin;
1948   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1954 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1955 {
1956   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1957   PetscFunctionBegin;
1958   *array = a->a;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1964 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #include "petscblaslapack.h"
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1973 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1974 {
1975   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1976   PetscErrorCode ierr;
1977   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1978   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1979 
1980   PetscFunctionBegin;
1981   if (str == SAME_NONZERO_PATTERN) {
1982     PetscScalar alpha = a;
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1985     if (y->xtoy && y->XtoY != X) {
1986       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1987       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1988     }
1989     if (!y->xtoy) { /* get xtoy */
1990       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1991       y->XtoY = X;
1992     }
1993     bs2 = bs*bs;
1994     for (i=0; i<x->nz; i++) {
1995       j = 0;
1996       while (j < bs2){
1997         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1998         j++;
1999       }
2000     }
2001     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2002   } else {
2003     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2004   }
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2010 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2011 {
2012   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2013   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2014   PetscScalar    *aa = a->a;
2015 
2016   PetscFunctionBegin;
2017   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2023 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2024 {
2025   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2026   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2027   PetscScalar    *aa = a->a;
2028 
2029   PetscFunctionBegin;
2030   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 
2035 /* -------------------------------------------------------------------*/
2036 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2037        MatGetRow_SeqBAIJ,
2038        MatRestoreRow_SeqBAIJ,
2039        MatMult_SeqBAIJ_N,
2040 /* 4*/ MatMultAdd_SeqBAIJ_N,
2041        MatMultTranspose_SeqBAIJ,
2042        MatMultTransposeAdd_SeqBAIJ,
2043        MatSolve_SeqBAIJ_N,
2044        0,
2045        0,
2046 /*10*/ 0,
2047        MatLUFactor_SeqBAIJ,
2048        0,
2049        0,
2050        MatTranspose_SeqBAIJ,
2051 /*15*/ MatGetInfo_SeqBAIJ,
2052        MatEqual_SeqBAIJ,
2053        MatGetDiagonal_SeqBAIJ,
2054        MatDiagonalScale_SeqBAIJ,
2055        MatNorm_SeqBAIJ,
2056 /*20*/ 0,
2057        MatAssemblyEnd_SeqBAIJ,
2058        0,
2059        MatSetOption_SeqBAIJ,
2060        MatZeroEntries_SeqBAIJ,
2061 /*25*/ MatZeroRows_SeqBAIJ,
2062        MatLUFactorSymbolic_SeqBAIJ,
2063        MatLUFactorNumeric_SeqBAIJ_N,
2064        MatCholeskyFactorSymbolic_SeqBAIJ,
2065        MatCholeskyFactorNumeric_SeqBAIJ_N,
2066 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2067        MatILUFactorSymbolic_SeqBAIJ,
2068        MatICCFactorSymbolic_SeqBAIJ,
2069        MatGetArray_SeqBAIJ,
2070        MatRestoreArray_SeqBAIJ,
2071 /*35*/ MatDuplicate_SeqBAIJ,
2072        0,
2073        0,
2074        MatILUFactor_SeqBAIJ,
2075        0,
2076 /*40*/ MatAXPY_SeqBAIJ,
2077        MatGetSubMatrices_SeqBAIJ,
2078        MatIncreaseOverlap_SeqBAIJ,
2079        MatGetValues_SeqBAIJ,
2080        MatCopy_SeqBAIJ,
2081 /*45*/ 0,
2082        MatScale_SeqBAIJ,
2083        0,
2084        0,
2085        0,
2086 /*50*/ 0,
2087        MatGetRowIJ_SeqBAIJ,
2088        MatRestoreRowIJ_SeqBAIJ,
2089        0,
2090        0,
2091 /*55*/ 0,
2092        0,
2093        0,
2094        0,
2095        MatSetValuesBlocked_SeqBAIJ,
2096 /*60*/ MatGetSubMatrix_SeqBAIJ,
2097        MatDestroy_SeqBAIJ,
2098        MatView_SeqBAIJ,
2099        0,
2100        0,
2101 /*65*/ 0,
2102        0,
2103        0,
2104        0,
2105        0,
2106 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2107        MatConvert_Basic,
2108        0,
2109        0,
2110        0,
2111 /*75*/ 0,
2112        0,
2113        0,
2114        0,
2115        0,
2116 /*80*/ 0,
2117        0,
2118        0,
2119        0,
2120        MatLoad_SeqBAIJ,
2121 /*85*/ 0,
2122        0,
2123        0,
2124        0,
2125        0,
2126 /*90*/ 0,
2127        0,
2128        0,
2129        0,
2130        0,
2131 /*95*/ 0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*100*/0,
2137        0,
2138        0,
2139        0,
2140        0,
2141 /*105*/0,
2142        MatRealPart_SeqBAIJ,
2143        MatImaginaryPart_SeqBAIJ
2144 };
2145 
2146 EXTERN_C_BEGIN
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2149 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2150 {
2151   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2152   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2153   PetscErrorCode ierr;
2154 
2155   PetscFunctionBegin;
2156   if (aij->nonew != 1) {
2157     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first");
2158   }
2159 
2160   /* allocate space for values if not already there */
2161   if (!aij->saved_values) {
2162     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2163   }
2164 
2165   /* copy values over */
2166   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 EXTERN_C_END
2170 
2171 EXTERN_C_BEGIN
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2174 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2175 {
2176   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2177   PetscErrorCode ierr;
2178   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2179 
2180   PetscFunctionBegin;
2181   if (aij->nonew != 1) {
2182     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first");
2183   }
2184   if (!aij->saved_values) {
2185     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2186   }
2187 
2188   /* copy values over */
2189   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 EXTERN_C_END
2193 
2194 EXTERN_C_BEGIN
2195 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2196 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2197 EXTERN_C_END
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2203 {
2204   Mat_SeqBAIJ    *b;
2205   PetscErrorCode ierr;
2206   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2207   PetscTruth     flg,skipallocation = PETSC_FALSE;
2208 
2209   PetscFunctionBegin;
2210 
2211   if (nz == MAT_SKIP_ALLOCATION) {
2212     skipallocation = PETSC_TRUE;
2213     nz             = 0;
2214   }
2215 
2216   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2217     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2218   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2219 
2220   if (nnz && newbs != bs) {
2221     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2222   }
2223   bs   = newbs;
2224 
2225   B->rmap.bs = B->cmap.bs = bs;
2226   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2227   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2228 
2229   B->preallocated = PETSC_TRUE;
2230 
2231   mbs  = B->rmap.n/bs;
2232   nbs  = B->cmap.n/bs;
2233   bs2  = bs*bs;
2234 
2235   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2236     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2237   }
2238 
2239   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2240   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2241   if (nnz) {
2242     for (i=0; i<mbs; i++) {
2243       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2244       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2245     }
2246   }
2247 
2248   b       = (Mat_SeqBAIJ*)B->data;
2249   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2250     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2251   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2252 
2253   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2254   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2255   if (!flg) {
2256     switch (bs) {
2257     case 1:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2259       B->ops->mult            = MatMult_SeqBAIJ_1;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2261       break;
2262     case 2:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2264       B->ops->mult            = MatMult_SeqBAIJ_2;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2266       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2267       break;
2268     case 3:
2269       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2270       B->ops->mult            = MatMult_SeqBAIJ_3;
2271       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2272       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2273       break;
2274     case 4:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2276       B->ops->mult            = MatMult_SeqBAIJ_4;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2278       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2279       break;
2280     case 5:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2282       B->ops->mult            = MatMult_SeqBAIJ_5;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2284       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2285       break;
2286     case 6:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2288       B->ops->mult            = MatMult_SeqBAIJ_6;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2290       break;
2291     case 7:
2292       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2293       B->ops->mult            = MatMult_SeqBAIJ_7;
2294       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2295       break;
2296     default:
2297       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2298       B->ops->mult            = MatMult_SeqBAIJ_N;
2299       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2300       break;
2301     }
2302   }
2303   B->rmap.bs      = bs;
2304   b->mbs     = mbs;
2305   b->nbs     = nbs;
2306   if (!skipallocation) {
2307     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2308     /* b->ilen will count nonzeros in each block row so far. */
2309     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2310     if (!nnz) {
2311       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2312       else if (nz <= 0)        nz = 1;
2313       for (i=0; i<mbs; i++) b->imax[i] = nz;
2314       nz = nz*mbs;
2315     } else {
2316       nz = 0;
2317       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2318     }
2319 
2320     /* allocate the matrix space */
2321     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2322     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2323     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2324     b->singlemalloc = PETSC_TRUE;
2325 
2326     b->i[0] = 0;
2327     for (i=1; i<mbs+1; i++) {
2328       b->i[i] = b->i[i-1] + b->imax[i-1];
2329     }
2330     b->free_a     = PETSC_TRUE;
2331     b->free_ij    = PETSC_TRUE;
2332   } else {
2333     b->free_a     = PETSC_FALSE;
2334     b->free_ij    = PETSC_FALSE;
2335   }
2336 
2337   B->rmap.bs          = bs;
2338   b->bs2              = bs2;
2339   b->mbs              = mbs;
2340   b->nz               = 0;
2341   b->maxnz            = nz*bs2;
2342   B->info.nz_unneeded = (PetscReal)b->maxnz;
2343   PetscFunctionReturn(0);
2344 }
2345 EXTERN_C_END
2346 
2347 /*MC
2348    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2349    block sparse compressed row format.
2350 
2351    Options Database Keys:
2352 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2353 
2354   Level: beginner
2355 
2356 .seealso: MatCreateSeqBAIJ()
2357 M*/
2358 
2359 EXTERN_C_BEGIN
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatCreate_SeqBAIJ"
2362 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2363 {
2364   PetscErrorCode ierr;
2365   PetscMPIInt    size;
2366   Mat_SeqBAIJ    *b;
2367 
2368   PetscFunctionBegin;
2369   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2370   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2371 
2372   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2373   B->data = (void*)b;
2374   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2375   B->factor           = 0;
2376   B->mapping          = 0;
2377   b->row              = 0;
2378   b->col              = 0;
2379   b->icol             = 0;
2380   b->reallocs         = 0;
2381   b->saved_values     = 0;
2382 #if defined(PETSC_USE_MAT_SINGLE)
2383   b->setvalueslen     = 0;
2384   b->setvaluescopy    = PETSC_NULL;
2385 #endif
2386 
2387   b->sorted           = PETSC_FALSE;
2388   b->roworiented      = PETSC_TRUE;
2389   b->nonew            = 0;
2390   b->diag             = 0;
2391   b->solve_work       = 0;
2392   b->mult_work        = 0;
2393   B->spptr            = 0;
2394   B->info.nz_unneeded = (PetscReal)b->maxnz;
2395   b->keepzeroedrows   = PETSC_FALSE;
2396   b->xtoy              = 0;
2397   b->XtoY              = 0;
2398   b->compressedrow.use     = PETSC_FALSE;
2399   b->compressedrow.nrows   = 0;
2400   b->compressedrow.i       = PETSC_NULL;
2401   b->compressedrow.rindex  = PETSC_NULL;
2402   b->compressedrow.checked = PETSC_FALSE;
2403   B->same_nonzero          = PETSC_FALSE;
2404 
2405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2406                                      "MatInvertBlockDiagonal_SeqBAIJ",
2407                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2409                                      "MatStoreValues_SeqBAIJ",
2410                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2412                                      "MatRetrieveValues_SeqBAIJ",
2413                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2415                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2416                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2418                                      "MatConvert_SeqBAIJ_SeqAIJ",
2419                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2420   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2421                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2422                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2423   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2424                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2425                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2426   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 EXTERN_C_END
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2433 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2434 {
2435   Mat            C;
2436   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2437   PetscErrorCode ierr;
2438   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2439 
2440   PetscFunctionBegin;
2441   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2442 
2443   *B = 0;
2444   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2445   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2446   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2447   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2448   c    = (Mat_SeqBAIJ*)C->data;
2449 
2450   C->rmap.N   = A->rmap.N;
2451   C->cmap.N   = A->cmap.N;
2452   C->rmap.bs  = A->rmap.bs;
2453   c->bs2 = a->bs2;
2454   c->mbs = a->mbs;
2455   c->nbs = a->nbs;
2456 
2457   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2458   for (i=0; i<mbs; i++) {
2459     c->imax[i] = a->imax[i];
2460     c->ilen[i] = a->ilen[i];
2461   }
2462 
2463   /* allocate the matrix space */
2464   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2465   c->singlemalloc = PETSC_TRUE;
2466   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2467   if (mbs > 0) {
2468     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2469     if (cpvalues == MAT_COPY_VALUES) {
2470       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2471     } else {
2472       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2473     }
2474   }
2475   c->sorted      = a->sorted;
2476   c->roworiented = a->roworiented;
2477   c->nonew       = a->nonew;
2478 
2479   if (a->diag) {
2480     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2481     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2482     for (i=0; i<mbs; i++) {
2483       c->diag[i] = a->diag[i];
2484     }
2485   } else c->diag        = 0;
2486   c->nz                 = a->nz;
2487   c->maxnz              = a->maxnz;
2488   c->solve_work         = 0;
2489   c->mult_work          = 0;
2490   c->free_a             = PETSC_TRUE;
2491   c->free_ij            = PETSC_TRUE;
2492   C->preallocated       = PETSC_TRUE;
2493   C->assembled          = PETSC_TRUE;
2494 
2495   c->compressedrow.use     = a->compressedrow.use;
2496   c->compressedrow.nrows   = a->compressedrow.nrows;
2497   c->compressedrow.checked = a->compressedrow.checked;
2498   if ( a->compressedrow.checked && a->compressedrow.use){
2499     i = a->compressedrow.nrows;
2500     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2501     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2502     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2503     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2504   } else {
2505     c->compressedrow.use    = PETSC_FALSE;
2506     c->compressedrow.i      = PETSC_NULL;
2507     c->compressedrow.rindex = PETSC_NULL;
2508   }
2509   C->same_nonzero = A->same_nonzero;
2510   *B = C;
2511   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatLoad_SeqBAIJ"
2517 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2518 {
2519   Mat_SeqBAIJ    *a;
2520   Mat            B;
2521   PetscErrorCode ierr;
2522   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2523   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2524   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2525   PetscInt       *masked,nmask,tmp,bs2,ishift;
2526   PetscMPIInt    size;
2527   int            fd;
2528   PetscScalar    *aa;
2529   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2533     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2534   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2535   bs2  = bs*bs;
2536 
2537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2538   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2539   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2540   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2541   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2542   M = header[1]; N = header[2]; nz = header[3];
2543 
2544   if (header[3] < 0) {
2545     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2546   }
2547 
2548   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2549 
2550   /*
2551      This code adds extra rows to make sure the number of rows is
2552     divisible by the blocksize
2553   */
2554   mbs        = M/bs;
2555   extra_rows = bs - M + bs*(mbs);
2556   if (extra_rows == bs) extra_rows = 0;
2557   else                  mbs++;
2558   if (extra_rows) {
2559     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2560   }
2561 
2562   /* read in row lengths */
2563   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2564   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2565   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2566 
2567   /* read in column indices */
2568   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2569   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2570   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2571 
2572   /* loop over row lengths determining block row lengths */
2573   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2574   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2575   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2576   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2577   masked   = mask + mbs;
2578   rowcount = 0; nzcount = 0;
2579   for (i=0; i<mbs; i++) {
2580     nmask = 0;
2581     for (j=0; j<bs; j++) {
2582       kmax = rowlengths[rowcount];
2583       for (k=0; k<kmax; k++) {
2584         tmp = jj[nzcount++]/bs;
2585         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2586       }
2587       rowcount++;
2588     }
2589     browlengths[i] += nmask;
2590     /* zero out the mask elements we set */
2591     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2592   }
2593 
2594   /* create our matrix */
2595   ierr = MatCreate(comm,&B);
2596   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2597   ierr = MatSetType(B,type);CHKERRQ(ierr);
2598   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2599   a = (Mat_SeqBAIJ*)B->data;
2600 
2601   /* set matrix "i" values */
2602   a->i[0] = 0;
2603   for (i=1; i<= mbs; i++) {
2604     a->i[i]      = a->i[i-1] + browlengths[i-1];
2605     a->ilen[i-1] = browlengths[i-1];
2606   }
2607   a->nz         = 0;
2608   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2609 
2610   /* read in nonzero values */
2611   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2612   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2613   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2614 
2615   /* set "a" and "j" values into matrix */
2616   nzcount = 0; jcount = 0;
2617   for (i=0; i<mbs; i++) {
2618     nzcountb = nzcount;
2619     nmask    = 0;
2620     for (j=0; j<bs; j++) {
2621       kmax = rowlengths[i*bs+j];
2622       for (k=0; k<kmax; k++) {
2623         tmp = jj[nzcount++]/bs;
2624 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2625       }
2626     }
2627     /* sort the masked values */
2628     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2629 
2630     /* set "j" values into matrix */
2631     maskcount = 1;
2632     for (j=0; j<nmask; j++) {
2633       a->j[jcount++]  = masked[j];
2634       mask[masked[j]] = maskcount++;
2635     }
2636     /* set "a" values into matrix */
2637     ishift = bs2*a->i[i];
2638     for (j=0; j<bs; j++) {
2639       kmax = rowlengths[i*bs+j];
2640       for (k=0; k<kmax; k++) {
2641         tmp       = jj[nzcountb]/bs ;
2642         block     = mask[tmp] - 1;
2643         point     = jj[nzcountb] - bs*tmp;
2644         idx       = ishift + bs2*block + j + bs*point;
2645         a->a[idx] = (MatScalar)aa[nzcountb++];
2646       }
2647     }
2648     /* zero out the mask elements we set */
2649     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2650   }
2651   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2652 
2653   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2654   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2655   ierr = PetscFree(aa);CHKERRQ(ierr);
2656   ierr = PetscFree(jj);CHKERRQ(ierr);
2657   ierr = PetscFree(mask);CHKERRQ(ierr);
2658 
2659   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661   ierr = MatView_Private(B);CHKERRQ(ierr);
2662 
2663   *A = B;
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatCreateSeqBAIJ"
2669 /*@C
2670    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2671    compressed row) format.  For good matrix assembly performance the
2672    user should preallocate the matrix storage by setting the parameter nz
2673    (or the array nnz).  By setting these parameters accurately, performance
2674    during matrix assembly can be increased by more than a factor of 50.
2675 
2676    Collective on MPI_Comm
2677 
2678    Input Parameters:
2679 +  comm - MPI communicator, set to PETSC_COMM_SELF
2680 .  bs - size of block
2681 .  m - number of rows
2682 .  n - number of columns
2683 .  nz - number of nonzero blocks  per block row (same for all rows)
2684 -  nnz - array containing the number of nonzero blocks in the various block rows
2685          (possibly different for each block row) or PETSC_NULL
2686 
2687    Output Parameter:
2688 .  A - the matrix
2689 
2690    Options Database Keys:
2691 .   -mat_no_unroll - uses code that does not unroll the loops in the
2692                      block calculations (much slower)
2693 .    -mat_block_size - size of the blocks to use
2694 
2695    Level: intermediate
2696 
2697    Notes:
2698    The number of rows and columns must be divisible by blocksize.
2699 
2700    If the nnz parameter is given then the nz parameter is ignored
2701 
2702    A nonzero block is any block that as 1 or more nonzeros in it
2703 
2704    The block AIJ format is fully compatible with standard Fortran 77
2705    storage.  That is, the stored row and column indices can begin at
2706    either one (as in Fortran) or zero.  See the users' manual for details.
2707 
2708    Specify the preallocated storage with either nz or nnz (not both).
2709    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2710    allocation.  For additional details, see the users manual chapter on
2711    matrices.
2712 
2713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2714 @*/
2715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2716 {
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2721   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2722   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2723   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2729 /*@C
2730    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2731    per row in the matrix. For good matrix assembly performance the
2732    user should preallocate the matrix storage by setting the parameter nz
2733    (or the array nnz).  By setting these parameters accurately, performance
2734    during matrix assembly can be increased by more than a factor of 50.
2735 
2736    Collective on MPI_Comm
2737 
2738    Input Parameters:
2739 +  A - the matrix
2740 .  bs - size of block
2741 .  nz - number of block nonzeros per block row (same for all rows)
2742 -  nnz - array containing the number of block nonzeros in the various block rows
2743          (possibly different for each block row) or PETSC_NULL
2744 
2745    Options Database Keys:
2746 .   -mat_no_unroll - uses code that does not unroll the loops in the
2747                      block calculations (much slower)
2748 .    -mat_block_size - size of the blocks to use
2749 
2750    Level: intermediate
2751 
2752    Notes:
2753    If the nnz parameter is given then the nz parameter is ignored
2754 
2755    The block AIJ format is fully compatible with standard Fortran 77
2756    storage.  That is, the stored row and column indices can begin at
2757    either one (as in Fortran) or zero.  See the users' manual for details.
2758 
2759    Specify the preallocated storage with either nz or nnz (not both).
2760    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2761    allocation.  For additional details, see the users manual chapter on
2762    matrices.
2763 
2764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2765 @*/
2766 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2767 {
2768   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2769 
2770   PetscFunctionBegin;
2771   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2772   if (f) {
2773     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2774   }
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2780 /*@
2781      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2782               (upper triangular entries in CSR format) provided by the user.
2783 
2784      Collective on MPI_Comm
2785 
2786    Input Parameters:
2787 +  comm - must be an MPI communicator of size 1
2788 .  bs - size of block
2789 .  m - number of rows
2790 .  n - number of columns
2791 .  i - row indices
2792 .  j - column indices
2793 -  a - matrix values
2794 
2795    Output Parameter:
2796 .  mat - the matrix
2797 
2798    Level: intermediate
2799 
2800    Notes:
2801        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2802     once the matrix is destroyed
2803 
2804        You cannot set new nonzero locations into this matrix, that will generate an error.
2805 
2806        The i and j indices are 0 based
2807 
2808 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2809 
2810 @*/
2811 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2812 {
2813   PetscErrorCode ierr;
2814   PetscInt       ii;
2815   Mat_SeqBAIJ    *baij;
2816 
2817   PetscFunctionBegin;
2818   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2819   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2820 
2821   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2822   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2823   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2824   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2825   baij = (Mat_SeqBAIJ*)(*mat)->data;
2826   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2827 
2828   baij->i = i;
2829   baij->j = j;
2830   baij->a = a;
2831   baij->singlemalloc = PETSC_FALSE;
2832   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2833   baij->free_a       = PETSC_FALSE;
2834   baij->free_ij       = PETSC_FALSE;
2835 
2836   for (ii=0; ii<m; ii++) {
2837     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2838 #if defined(PETSC_USE_DEBUG)
2839     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2840 #endif
2841   }
2842 #if defined(PETSC_USE_DEBUG)
2843   for (ii=0; ii<baij->i[m]; ii++) {
2844     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2845     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2846   }
2847 #endif
2848 
2849   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2850   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853