xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 28adb3f739167aae2a3fdf9bf501dbd0150de1de)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,idx;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         while (nz--) {
175           idx  = (*vi++);
176           x1   = x[idx];
177           s1  -= v[0]*x1;
178           v   += 1;
179         }
180         x[i2]   = idiag[0]*s1;
181         idiag   += 1;
182         i2      += 1;
183       }
184       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
185       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
186     }
187     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
188         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
189       i2    = 0;
190       mdiag = a->idiag+a->mbs;
191       for (i=0; i<m; i++) {
192         x1      = x[i2];
193         x[i2]   = mdiag[0]*x1;
194         mdiag  += 1;
195         i2     += 1;
196       }
197       ierr = PetscLogFlops(m);CHKERRQ(ierr);
198     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
199       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
200     }
201     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
202       idiag   = a->idiag+a->mbs - 1;
203       i2      = m - 1;
204       x1      = x[i2];
205       x[i2]   = idiag[0]*x1;
206       idiag -= 1;
207       i2    -= 1;
208       for (i=m-2; i>=0; i--) {
209         v     = aa + (diag[i]+1);
210         vi    = aj + diag[i] + 1;
211         nz    = ai[i+1] - diag[i] - 1;
212         s1    = x[i2];
213         while (nz--) {
214           idx  = (*vi++);
215           x1   = x[idx];
216           s1  -= v[0]*x1;
217           v   += 1;
218         }
219         x[i2]   = idiag[0]*s1;
220         idiag   -= 1;
221         i2      -= 1;
222       }
223       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
224     }
225   } else {
226     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
227   }
228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
229   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar        *x,x1,x2,s1,s2;
239   const PetscScalar  *b;
240   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
241   PetscErrorCode     ierr;
242   PetscInt           m = a->mbs,i,i2,nz,idx;
243   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
244 
245   PetscFunctionBegin;
246   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
247   its = its*lits;
248   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
249   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
250   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
251   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
252   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
253 
254   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
255 
256   diag  = a->diag;
257   idiag = a->idiag;
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
260 
261   if (flag & SOR_ZERO_INITIAL_GUESS) {
262     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
263       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
264       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
265       i2     = 2;
266       idiag += 4;
267       for (i=1; i<m; i++) {
268 	v     = aa + 4*ai[i];
269 	vi    = aj + ai[i];
270 	nz    = diag[i] - ai[i];
271 	s1    = b[i2]; s2 = b[i2+1];
272 	while (nz--) {
273 	  idx  = 2*(*vi++);
274 	  x1   = x[idx]; x2 = x[1+idx];
275 	  s1  -= v[0]*x1 + v[2]*x2;
276 	  s2  -= v[1]*x1 + v[3]*x2;
277 	  v   += 4;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
280 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
281         idiag   += 4;
282         i2      += 2;
283       }
284       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
285       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
286     }
287     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
288         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
289       i2    = 0;
290       mdiag = a->idiag+4*a->mbs;
291       for (i=0; i<m; i++) {
292         x1      = x[i2]; x2 = x[i2+1];
293         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
294         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
295         mdiag  += 4;
296         i2     += 2;
297       }
298       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
299     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
300       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
301     }
302     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
303       idiag   = a->idiag+4*a->mbs - 4;
304       i2      = 2*m - 2;
305       x1      = x[i2]; x2 = x[i2+1];
306       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
307       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
308       idiag -= 4;
309       i2    -= 2;
310       for (i=m-2; i>=0; i--) {
311 	v     = aa + 4*(diag[i]+1);
312 	vi    = aj + diag[i] + 1;
313 	nz    = ai[i+1] - diag[i] - 1;
314 	s1    = x[i2]; s2 = x[i2+1];
315 	while (nz--) {
316 	  idx  = 2*(*vi++);
317 	  x1   = x[idx]; x2 = x[1+idx];
318 	  s1  -= v[0]*x1 + v[2]*x2;
319 	  s2  -= v[1]*x1 + v[3]*x2;
320 	  v   += 4;
321 	}
322 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
323 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
324         idiag   -= 4;
325         i2      -= 2;
326       }
327       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
328     }
329   } else {
330     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
331   }
332   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
333   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
340 {
341   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
343   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
344   const PetscScalar  *b;
345   PetscErrorCode     ierr;
346   PetscInt           m = a->mbs,i,i2,nz,idx;
347   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
348 
349   PetscFunctionBegin;
350   its = its*lits;
351   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
352   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
353   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
354   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
355   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
356   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
357 
358   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
359 
360   diag  = a->diag;
361   idiag = a->idiag;
362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
364 
365   if (flag & SOR_ZERO_INITIAL_GUESS) {
366     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
367       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
368       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
369       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
370       i2     = 3;
371       idiag += 9;
372       for (i=1; i<m; i++) {
373         v     = aa + 9*ai[i];
374         vi    = aj + ai[i];
375         nz    = diag[i] - ai[i];
376         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
377         while (nz--) {
378           idx  = 3*(*vi++);
379           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
380           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
381           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
382           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
383           v   += 9;
384         }
385         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
386         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
387         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
388         idiag   += 9;
389         i2      += 3;
390       }
391       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
392       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
393     }
394     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
395         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
396       i2    = 0;
397       mdiag = a->idiag+9*a->mbs;
398       for (i=0; i<m; i++) {
399         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
400         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
401         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
402         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
403         mdiag  += 9;
404         i2     += 3;
405       }
406       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
407     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
408       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
409     }
410     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
411       idiag   = a->idiag+9*a->mbs - 9;
412       i2      = 3*m - 3;
413       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
414       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
415       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
416       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
417       idiag -= 9;
418       i2    -= 3;
419       for (i=m-2; i>=0; i--) {
420         v     = aa + 9*(diag[i]+1);
421         vi    = aj + diag[i] + 1;
422         nz    = ai[i+1] - diag[i] - 1;
423         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
424         while (nz--) {
425           idx  = 3*(*vi++);
426           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
427           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
428           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
429           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
430           v   += 9;
431         }
432         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
433         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
434         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
435         idiag   -= 9;
436         i2      -= 3;
437       }
438       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
439     }
440   } else {
441     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
442   }
443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
451 {
452   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
454   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
455   const PetscScalar  *b;
456   PetscErrorCode     ierr;
457   PetscInt           m = a->mbs,i,i2,nz,idx;
458   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
459 
460   PetscFunctionBegin;
461   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
462   its = its*lits;
463   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
464   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
465   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
466   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
467   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
468 
469   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
470 
471   diag  = a->diag;
472   idiag = a->idiag;
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
475 
476   if (flag & SOR_ZERO_INITIAL_GUESS) {
477     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
478       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
479       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
480       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
481       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
482       i2     = 4;
483       idiag += 16;
484       for (i=1; i<m; i++) {
485 	v     = aa + 16*ai[i];
486 	vi    = aj + ai[i];
487 	nz    = diag[i] - ai[i];
488 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
489 	while (nz--) {
490 	  idx  = 4*(*vi++);
491 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
492 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
493 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
494 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
495 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
496 	  v   += 16;
497 	}
498 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
499 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
500 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
501 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
502         idiag   += 16;
503         i2      += 4;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+16*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
515         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
516         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
517         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
518         mdiag  += 16;
519         i2     += 4;
520       }
521       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
522     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
523       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
524     }
525     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
526       idiag   = a->idiag+16*a->mbs - 16;
527       i2      = 4*m - 4;
528       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
529       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
530       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
531       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
532       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
533       idiag -= 16;
534       i2    -= 4;
535       for (i=m-2; i>=0; i--) {
536 	v     = aa + 16*(diag[i]+1);
537 	vi    = aj + diag[i] + 1;
538 	nz    = ai[i+1] - diag[i] - 1;
539 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
540 	while (nz--) {
541 	  idx  = 4*(*vi++);
542 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
543 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
544 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
545 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
546 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
547 	  v   += 16;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
550 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
551 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
552 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
553         idiag   -= 16;
554         i2      -= 4;
555       }
556       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
569 {
570   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
571   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
572   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
573   const PetscScalar  *b;
574   PetscErrorCode     ierr;
575   PetscInt           m = a->mbs,i,i2,nz,idx;
576   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
577 
578   PetscFunctionBegin;
579   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
580   its = its*lits;
581   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
582   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
583   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
584   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
585   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
586 
587   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
588 
589   diag  = a->diag;
590   idiag = a->idiag;
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
593 
594   if (flag & SOR_ZERO_INITIAL_GUESS) {
595     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
596       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
597       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
598       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
599       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
600       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
601       i2     = 5;
602       idiag += 25;
603       for (i=1; i<m; i++) {
604 	v     = aa + 25*ai[i];
605 	vi    = aj + ai[i];
606 	nz    = diag[i] - ai[i];
607 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
608 	while (nz--) {
609 	  idx  = 5*(*vi++);
610 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
611 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
612 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
613 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
614 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
615 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
616 	  v   += 25;
617 	}
618 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
619 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
620 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
621 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
622 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
623         idiag   += 25;
624         i2      += 5;
625       }
626       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
627       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
628     }
629     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
630         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
631       i2    = 0;
632       mdiag = a->idiag+25*a->mbs;
633       for (i=0; i<m; i++) {
634         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
635         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
636         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
637         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
638         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
639         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
640         mdiag  += 25;
641         i2     += 5;
642       }
643       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
644     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
645       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
646     }
647     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
648       idiag   = a->idiag+25*a->mbs - 25;
649       i2      = 5*m - 5;
650       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
651       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
652       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
653       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
654       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
655       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
656       idiag -= 25;
657       i2    -= 5;
658       for (i=m-2; i>=0; i--) {
659 	v     = aa + 25*(diag[i]+1);
660 	vi    = aj + diag[i] + 1;
661 	nz    = ai[i+1] - diag[i] - 1;
662 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
663 	while (nz--) {
664 	  idx  = 5*(*vi++);
665 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
666 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
667 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
668 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
669 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
670 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
671 	  v   += 25;
672 	}
673 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
674 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
675 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
676 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
677 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
678         idiag   -= 25;
679         i2      -= 5;
680       }
681       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
682     }
683   } else {
684     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
685   }
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
696   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
697   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
698   const PetscScalar  *b;
699   PetscErrorCode     ierr;
700   PetscInt           m = a->mbs,i,i2,nz,idx;
701   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
702 
703   PetscFunctionBegin;
704   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
705   its = its*lits;
706   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
707   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
708   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
711 
712   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
713 
714   diag  = a->diag;
715   idiag = a->idiag;
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
718 
719   if (flag & SOR_ZERO_INITIAL_GUESS) {
720     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
721       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
722       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
723       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
724       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
725       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
726       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
727       i2     = 6;
728       idiag += 36;
729       for (i=1; i<m; i++) {
730         v     = aa + 36*ai[i];
731         vi    = aj + ai[i];
732         nz    = diag[i] - ai[i];
733         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
734         while (nz--) {
735           idx  = 6*(*vi++);
736           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
737           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
738           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
739           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
740           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
741           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
742           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
743           v   += 36;
744         }
745         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
746         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
747         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
748         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
749         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
750         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
751         idiag   += 36;
752         i2      += 6;
753       }
754       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
755       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
756     }
757     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
758         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
759       i2    = 0;
760       mdiag = a->idiag+36*a->mbs;
761       for (i=0; i<m; i++) {
762         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
763         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
764         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
765         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
766         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
767         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
768         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
769         mdiag  += 36;
770         i2     += 6;
771       }
772       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
773     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
774       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
777       idiag   = a->idiag+36*a->mbs - 36;
778       i2      = 6*m - 6;
779       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
780       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
781       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
782       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
783       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
784       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
785       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
786       idiag -= 36;
787       i2    -= 6;
788       for (i=m-2; i>=0; i--) {
789         v     = aa + 36*(diag[i]+1);
790         vi    = aj + diag[i] + 1;
791         nz    = ai[i+1] - diag[i] - 1;
792         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
793         while (nz--) {
794           idx  = 6*(*vi++);
795           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
796           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
797           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
798           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
799           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
800           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
801           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
802           v   += 36;
803         }
804         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
805         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
806         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
807         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
808         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
809         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
810         idiag   -= 36;
811         i2      -= 6;
812       }
813       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
814     }
815   } else {
816     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
817   }
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
826 {
827   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
828   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
829   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
830   const PetscScalar  *b;
831   PetscErrorCode     ierr;
832   PetscInt           m = a->mbs,i,i2,nz,idx;
833   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
834 
835   PetscFunctionBegin;
836   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
837   its = its*lits;
838   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
840   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
841   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
842   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
843 
844   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
845 
846   diag  = a->diag;
847   idiag = a->idiag;
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
850 
851   if (flag & SOR_ZERO_INITIAL_GUESS) {
852     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
853       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
854       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
855       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
856       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
857       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
858       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
859       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
860       i2     = 7;
861       idiag += 49;
862       for (i=1; i<m; i++) {
863         v     = aa + 49*ai[i];
864         vi    = aj + ai[i];
865         nz    = diag[i] - ai[i];
866         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
867         while (nz--) {
868           idx  = 7*(*vi++);
869           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
870           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
871           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
872           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
873           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
874           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
875           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
876           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
877           v   += 49;
878         }
879         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
880         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
881         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
882         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
883         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
884         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
885         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
886         idiag   += 49;
887         i2      += 7;
888       }
889       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
890       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
891     }
892     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
893         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
894       i2    = 0;
895       mdiag = a->idiag+49*a->mbs;
896       for (i=0; i<m; i++) {
897         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
899         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
900         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
901         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
902         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
903         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
904         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
905         mdiag  += 36;
906         i2     += 6;
907       }
908       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
909     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
910       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
911     }
912     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
913       idiag   = a->idiag+49*a->mbs - 49;
914       i2      = 7*m - 7;
915       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
916       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
917       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
918       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
919       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
920       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
921       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
922       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
923       idiag -= 49;
924       i2    -= 7;
925       for (i=m-2; i>=0; i--) {
926         v     = aa + 49*(diag[i]+1);
927         vi    = aj + diag[i] + 1;
928         nz    = ai[i+1] - diag[i] - 1;
929         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
930         while (nz--) {
931           idx  = 7*(*vi++);
932           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
933           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
934           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
935           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
936           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
937           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
938           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
939           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
940           v   += 49;
941         }
942         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
943         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
944         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
945         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
946         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
947         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
948         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
949         idiag   -= 49;
950         i2      -= 7;
951       }
952       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
953     }
954   } else {
955     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
956   }
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*
963     Special version for direct calls from Fortran (Used in PETSc-fun3d)
964 */
965 #if defined(PETSC_HAVE_FORTRAN_CAPS)
966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
968 #define matsetvaluesblocked4_ matsetvaluesblocked4
969 #endif
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "matsetvaluesblocked4_"
974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
975 {
976   Mat               A = *AA;
977   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
979   PetscInt          *ai=a->i,*ailen=a->ilen;
980   PetscInt          *aj=a->j,stepval,lastcol = -1;
981   const PetscScalar *value = v;
982   MatScalar         *ap,*aa = a->a,*bap;
983 
984   PetscFunctionBegin;
985   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
986   stepval = (n-1)*4;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k];
989     rp   = aj + ai[row];
990     ap   = aa + 16*ai[row];
991     nrow = ailen[row];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col = in[l];
996       if (col <= lastcol) low = 0; else high = nrow;
997       lastcol = col;
998       value = v + k*(stepval+4 + l)*4;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > col) high = t;
1002         else             low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > col) break;
1006         if (rp[i] == col) {
1007           bap  = ap +  16*i;
1008           for (ii=0; ii<4; ii++,value+=stepval) {
1009             for (jj=ii; jj<16; jj+=4) {
1010               bap[jj] += *value++;
1011             }
1012           }
1013           goto noinsert2;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column index thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N >= i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i] = col;
1027       bap   = ap +  16*i;
1028       for (ii=0; ii<4; ii++,value+=stepval) {
1029         for (jj=ii; jj<16; jj+=4) {
1030           bap[jj] = *value++;
1031         }
1032       }
1033       noinsert2:;
1034       low = i;
1035     }
1036     ailen[row] = nrow;
1037   }
1038   PetscFunctionReturnVoid();
1039 }
1040 EXTERN_C_END
1041 
1042 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1043 #define matsetvalues4_ MATSETVALUES4
1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1045 #define matsetvalues4_ matsetvalues4
1046 #endif
1047 
1048 EXTERN_C_BEGIN
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatSetValues4_"
1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1052 {
1053   Mat         A = *AA;
1054   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1055   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1056   PetscInt    *ai=a->i,*ailen=a->ilen;
1057   PetscInt    *aj=a->j,brow,bcol;
1058   PetscInt    ridx,cidx,lastcol = -1;
1059   MatScalar   *ap,value,*aa=a->a,*bap;
1060 
1061   PetscFunctionBegin;
1062   for (k=0; k<m; k++) { /* loop over added rows */
1063     row  = im[k]; brow = row/4;
1064     rp   = aj + ai[brow];
1065     ap   = aa + 16*ai[brow];
1066     nrow = ailen[brow];
1067     low  = 0;
1068     high = nrow;
1069     for (l=0; l<n; l++) { /* loop over added columns */
1070       col = in[l]; bcol = col/4;
1071       ridx = row % 4; cidx = col % 4;
1072       value = v[l + k*n];
1073       if (col <= lastcol) low = 0; else high = nrow;
1074       lastcol = col;
1075       while (high-low > 7) {
1076         t = (low+high)/2;
1077         if (rp[t] > bcol) high = t;
1078         else              low  = t;
1079       }
1080       for (i=low; i<high; i++) {
1081         if (rp[i] > bcol) break;
1082         if (rp[i] == bcol) {
1083           bap  = ap +  16*i + 4*cidx + ridx;
1084           *bap += value;
1085           goto noinsert1;
1086         }
1087       }
1088       N = nrow++ - 1;
1089       high++; /* added new column thus must search to one higher than before */
1090       /* shift up all the later entries in this row */
1091       for (ii=N; ii>=i; ii--) {
1092         rp[ii+1] = rp[ii];
1093         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1094       }
1095       if (N>=i) {
1096         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1097       }
1098       rp[i]                    = bcol;
1099       ap[16*i + 4*cidx + ridx] = value;
1100       noinsert1:;
1101       low = i;
1102     }
1103     ailen[brow] = nrow;
1104   }
1105   PetscFunctionReturnVoid();
1106 }
1107 EXTERN_C_END
1108 
1109 /*
1110      Checks for missing diagonals
1111 */
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       *diag,*jj = a->j,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1122   *missing = PETSC_FALSE;
1123   if (A->rmap->n > 0 && !jj) {
1124     *missing  = PETSC_TRUE;
1125     if (d) *d = 0;
1126     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1127   } else {
1128     diag     = a->diag;
1129     for (i=0; i<a->mbs; i++) {
1130       if (jj[diag[i]] != i) {
1131         *missing  = PETSC_TRUE;
1132         if (d) *d = i;
1133         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1134       }
1135     }
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1143 {
1144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,j,m = a->mbs;
1147 
1148   PetscFunctionBegin;
1149   if (!a->diag) {
1150     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1151   }
1152   for (i=0; i<m; i++) {
1153     a->diag[i] = a->i[i+1];
1154     for (j=a->i[i]; j<a->i[i+1]; j++) {
1155       if (a->j[j] == i) {
1156         a->diag[i] = j;
1157         break;
1158       }
1159     }
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 
1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1170 {
1171   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1174   PetscInt       *tia, *tja;
1175 
1176   PetscFunctionBegin;
1177   *nn = n;
1178   if (!ia) PetscFunctionReturn(0);
1179   if (symmetric) {
1180     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1181   } else {
1182     tia = a->i; tja = a->j;
1183   }
1184 
1185   if (!blockcompressed && bs > 1) {
1186     (*nn) *= bs;
1187     nbs    = bs;
1188     /* malloc & create the natural set of indices */
1189     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1190     if (n) {
1191       (*ia)[0] = 0;
1192       for (j=1; j<bs; j++) {
1193         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1194       }
1195     }
1196 
1197     for (i=1; i<n; i++) {
1198       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1199       for (j=1; j<bs; j++) {
1200         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1201       }
1202     }
1203     if (n) {
1204       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1205     }
1206 
1207     if (ja) {
1208       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1209       cnt = 0;
1210       for (i=0; i<n; i++) {
1211         for (j=0; j<bs; j++) {
1212           for (k=tia[i]; k<tia[i+1]; k++) {
1213             for (l=0; l<bs; l++) {
1214               (*ja)[cnt++] = bs*tja[k] + l;
1215 	    }
1216           }
1217         }
1218       }
1219     }
1220 
1221     n     *= bs;
1222     nz *= bs*bs;
1223     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1224       ierr = PetscFree(tia);CHKERRQ(ierr);
1225       ierr = PetscFree(tja);CHKERRQ(ierr);
1226     }
1227   } else if (oshift == 1) {
1228     PetscInt nz = a->i[A->rmap->n/bs];
1229     /* malloc space and  add 1 to i and j indices */
1230     ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1231     for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1232     if (ja) {
1233       ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1234       for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1235     }
1236   } else {
1237     *ia = tia;
1238     if (ja) *ja = tja;
1239   }
1240 
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1246 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1247 {
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   if (!ia) PetscFunctionReturn(0);
1252   if (!blockcompressed && A->rmap->bs > 1) {
1253     ierr = PetscFree(*ia);CHKERRQ(ierr);
1254     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1255   } else if (symmetric || oshift == 1) {
1256     ierr = PetscFree(*ia);CHKERRQ(ierr);
1257     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1264 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1265 {
1266   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1272 #endif
1273   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1274   if (a->row) {
1275     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1276   }
1277   if (a->col) {
1278     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1279   }
1280   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1281   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1282   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1283   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1284   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1285   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1286   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1287   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1288   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1289 
1290   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1291   ierr = PetscFree(a);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1307 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   switch (op) {
1314   case MAT_ROW_ORIENTED:
1315     a->roworiented    = flg;
1316     break;
1317   case MAT_KEEP_NONZERO_PATTERN:
1318     a->keepnonzeropattern = flg;
1319     break;
1320   case MAT_NEW_NONZERO_LOCATIONS:
1321     a->nonew          = (flg ? 0 : 1);
1322     break;
1323   case MAT_NEW_NONZERO_LOCATION_ERR:
1324     a->nonew          = (flg ? -1 : 0);
1325     break;
1326   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1327     a->nonew          = (flg ? -2 : 0);
1328     break;
1329   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1330     a->nounused       = (flg ? -1 : 0);
1331     break;
1332   case MAT_NEW_DIAGONALS:
1333   case MAT_IGNORE_OFF_PROC_ENTRIES:
1334   case MAT_USE_HASH_TABLE:
1335     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1336     break;
1337   case MAT_SYMMETRIC:
1338   case MAT_STRUCTURALLY_SYMMETRIC:
1339   case MAT_HERMITIAN:
1340   case MAT_SYMMETRY_ETERNAL:
1341     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1342     break;
1343   default:
1344     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1351 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1352 {
1353   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1356   MatScalar      *aa,*aa_i;
1357   PetscScalar    *v_i;
1358 
1359   PetscFunctionBegin;
1360   bs  = A->rmap->bs;
1361   ai  = a->i;
1362   aj  = a->j;
1363   aa  = a->a;
1364   bs2 = a->bs2;
1365 
1366   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1367 
1368   bn  = row/bs;   /* Block number */
1369   bp  = row % bs; /* Block Position */
1370   M   = ai[bn+1] - ai[bn];
1371   *nz = bs*M;
1372 
1373   if (v) {
1374     *v = 0;
1375     if (*nz) {
1376       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1377       for (i=0; i<M; i++) { /* for each block in the block row */
1378         v_i  = *v + i*bs;
1379         aa_i = aa + bs2*(ai[bn] + i);
1380         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1381       }
1382     }
1383   }
1384 
1385   if (idx) {
1386     *idx = 0;
1387     if (*nz) {
1388       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1389       for (i=0; i<M; i++) { /* for each block in the block row */
1390         idx_i = *idx + i*bs;
1391         itmp  = bs*aj[ai[bn] + i];
1392         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1393       }
1394     }
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1401 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1402 {
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1407   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1413 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1414 {
1415   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1416   Mat            C;
1417   PetscErrorCode ierr;
1418   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1419   PetscInt       *rows,*cols,bs2=a->bs2;
1420   MatScalar      *array;
1421 
1422   PetscFunctionBegin;
1423   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1424   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1425     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1426     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1427 
1428     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1429     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1430     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1431     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1432     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1433     ierr = PetscFree(col);CHKERRQ(ierr);
1434   } else {
1435     C = *B;
1436   }
1437 
1438   array = a->a;
1439   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1440   cols = rows + bs;
1441   for (i=0; i<mbs; i++) {
1442     cols[0] = i*bs;
1443     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1444     len = ai[i+1] - ai[i];
1445     for (j=0; j<len; j++) {
1446       rows[0] = (*aj++)*bs;
1447       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1448       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1449       array += bs2;
1450     }
1451   }
1452   ierr = PetscFree(rows);CHKERRQ(ierr);
1453 
1454   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456 
1457   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1458     *B = C;
1459   } else {
1460     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1467 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1468 {
1469   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1470   PetscErrorCode ierr;
1471   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1472   int            fd;
1473   PetscScalar    *aa;
1474   FILE           *file;
1475 
1476   PetscFunctionBegin;
1477   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1478   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1479   col_lens[0] = MAT_FILE_COOKIE;
1480 
1481   col_lens[1] = A->rmap->N;
1482   col_lens[2] = A->cmap->n;
1483   col_lens[3] = a->nz*bs2;
1484 
1485   /* store lengths of each row and write (including header) to file */
1486   count = 0;
1487   for (i=0; i<a->mbs; i++) {
1488     for (j=0; j<bs; j++) {
1489       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1490     }
1491   }
1492   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1493   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1494 
1495   /* store column indices (zero start index) */
1496   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1497   count = 0;
1498   for (i=0; i<a->mbs; i++) {
1499     for (j=0; j<bs; j++) {
1500       for (k=a->i[i]; k<a->i[i+1]; k++) {
1501         for (l=0; l<bs; l++) {
1502           jj[count++] = bs*a->j[k] + l;
1503         }
1504       }
1505     }
1506   }
1507   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1508   ierr = PetscFree(jj);CHKERRQ(ierr);
1509 
1510   /* store nonzero values */
1511   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1512   count = 0;
1513   for (i=0; i<a->mbs; i++) {
1514     for (j=0; j<bs; j++) {
1515       for (k=a->i[i]; k<a->i[i+1]; k++) {
1516         for (l=0; l<bs; l++) {
1517           aa[count++] = a->a[bs2*k + l*bs + j];
1518         }
1519       }
1520     }
1521   }
1522   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1523   ierr = PetscFree(aa);CHKERRQ(ierr);
1524 
1525   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1526   if (file) {
1527     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1534 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1535 {
1536   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1537   PetscErrorCode    ierr;
1538   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1539   PetscViewerFormat format;
1540 
1541   PetscFunctionBegin;
1542   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1543   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1544     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1545   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1546     Mat aij;
1547     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1548     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1549     ierr = MatDestroy(aij);CHKERRQ(ierr);
1550   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1551      PetscFunctionReturn(0);
1552   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1554     for (i=0; i<a->mbs; i++) {
1555       for (j=0; j<bs; j++) {
1556         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1557         for (k=a->i[i]; k<a->i[i+1]; k++) {
1558           for (l=0; l<bs; l++) {
1559 #if defined(PETSC_USE_COMPLEX)
1560             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1562                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1565                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             }
1569 #else
1570             if (a->a[bs2*k + l*bs + j] != 0.0) {
1571               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1572             }
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1580   } else {
1581     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1582     for (i=0; i<a->mbs; i++) {
1583       for (j=0; j<bs; j++) {
1584         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1585         for (k=a->i[i]; k<a->i[i+1]; k++) {
1586           for (l=0; l<bs; l++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1589               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1590                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1591             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1592               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1593                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1594             } else {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1596             }
1597 #else
1598             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1599 #endif
1600           }
1601         }
1602         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1603       }
1604     }
1605     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1606   }
1607   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1613 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1614 {
1615   Mat            A = (Mat) Aa;
1616   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1617   PetscErrorCode ierr;
1618   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1619   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1620   MatScalar      *aa;
1621   PetscViewer    viewer;
1622 
1623   PetscFunctionBegin;
1624 
1625   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1626   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1627 
1628   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1629 
1630   /* loop over matrix elements drawing boxes */
1631   color = PETSC_DRAW_BLUE;
1632   for (i=0,row=0; i<mbs; i++,row+=bs) {
1633     for (j=a->i[i]; j<a->i[i+1]; j++) {
1634       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1635       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1636       aa = a->a + j*bs2;
1637       for (k=0; k<bs; k++) {
1638         for (l=0; l<bs; l++) {
1639           if (PetscRealPart(*aa++) >=  0.) continue;
1640           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1641         }
1642       }
1643     }
1644   }
1645   color = PETSC_DRAW_CYAN;
1646   for (i=0,row=0; i<mbs; i++,row+=bs) {
1647     for (j=a->i[i]; j<a->i[i+1]; j++) {
1648       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1649       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1650       aa = a->a + j*bs2;
1651       for (k=0; k<bs; k++) {
1652         for (l=0; l<bs; l++) {
1653           if (PetscRealPart(*aa++) != 0.) continue;
1654           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1655         }
1656       }
1657     }
1658   }
1659 
1660   color = PETSC_DRAW_RED;
1661   for (i=0,row=0; i<mbs; i++,row+=bs) {
1662     for (j=a->i[i]; j<a->i[i+1]; j++) {
1663       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1664       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1665       aa = a->a + j*bs2;
1666       for (k=0; k<bs; k++) {
1667         for (l=0; l<bs; l++) {
1668           if (PetscRealPart(*aa++) <= 0.) continue;
1669           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1670         }
1671       }
1672     }
1673   }
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1680 {
1681   PetscErrorCode ierr;
1682   PetscReal      xl,yl,xr,yr,w,h;
1683   PetscDraw      draw;
1684   PetscTruth     isnull;
1685 
1686   PetscFunctionBegin;
1687 
1688   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1689   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1690 
1691   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1692   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1693   xr += w;    yr += h;  xl = -w;     yl = -h;
1694   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1695   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatView_SeqBAIJ"
1702 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1703 {
1704   PetscErrorCode ierr;
1705   PetscTruth     iascii,isbinary,isdraw;
1706 
1707   PetscFunctionBegin;
1708   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1709   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1710   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1711   if (iascii){
1712     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1713   } else if (isbinary) {
1714     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1715   } else if (isdraw) {
1716     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1717   } else {
1718     Mat B;
1719     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1720     ierr = MatView(B,viewer);CHKERRQ(ierr);
1721     ierr = MatDestroy(B);CHKERRQ(ierr);
1722   }
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1729 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1730 {
1731   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1732   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1733   PetscInt    *ai = a->i,*ailen = a->ilen;
1734   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1735   MatScalar   *ap,*aa = a->a;
1736 
1737   PetscFunctionBegin;
1738   for (k=0; k<m; k++) { /* loop over rows */
1739     row  = im[k]; brow = row/bs;
1740     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1741     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1742     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1743     nrow = ailen[brow];
1744     for (l=0; l<n; l++) { /* loop over columns */
1745       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1746       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1747       col  = in[l] ;
1748       bcol = col/bs;
1749       cidx = col%bs;
1750       ridx = row%bs;
1751       high = nrow;
1752       low  = 0; /* assume unsorted */
1753       while (high-low > 5) {
1754         t = (low+high)/2;
1755         if (rp[t] > bcol) high = t;
1756         else             low  = t;
1757       }
1758       for (i=low; i<high; i++) {
1759         if (rp[i] > bcol) break;
1760         if (rp[i] == bcol) {
1761           *v++ = ap[bs2*i+bs*cidx+ridx];
1762           goto finished;
1763         }
1764       }
1765       *v++ = 0.0;
1766       finished:;
1767     }
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #define CHUNKSIZE 10
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1775 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1776 {
1777   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1778   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1779   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1780   PetscErrorCode    ierr;
1781   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1782   PetscTruth        roworiented=a->roworiented;
1783   const PetscScalar *value = v;
1784   MatScalar         *ap,*aa = a->a,*bap;
1785 
1786   PetscFunctionBegin;
1787   if (roworiented) {
1788     stepval = (n-1)*bs;
1789   } else {
1790     stepval = (m-1)*bs;
1791   }
1792   for (k=0; k<m; k++) { /* loop over added rows */
1793     row  = im[k];
1794     if (row < 0) continue;
1795 #if defined(PETSC_USE_DEBUG)
1796     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1797 #endif
1798     rp   = aj + ai[row];
1799     ap   = aa + bs2*ai[row];
1800     rmax = imax[row];
1801     nrow = ailen[row];
1802     low  = 0;
1803     high = nrow;
1804     for (l=0; l<n; l++) { /* loop over added columns */
1805       if (in[l] < 0) continue;
1806 #if defined(PETSC_USE_DEBUG)
1807       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1808 #endif
1809       col = in[l];
1810       if (roworiented) {
1811         value = v + k*(stepval+bs)*bs + l*bs;
1812       } else {
1813         value = v + l*(stepval+bs)*bs + k*bs;
1814       }
1815       if (col <= lastcol) low = 0; else high = nrow;
1816       lastcol = col;
1817       while (high-low > 7) {
1818         t = (low+high)/2;
1819         if (rp[t] > col) high = t;
1820         else             low  = t;
1821       }
1822       for (i=low; i<high; i++) {
1823         if (rp[i] > col) break;
1824         if (rp[i] == col) {
1825           bap  = ap +  bs2*i;
1826           if (roworiented) {
1827             if (is == ADD_VALUES) {
1828               for (ii=0; ii<bs; ii++,value+=stepval) {
1829                 for (jj=ii; jj<bs2; jj+=bs) {
1830                   bap[jj] += *value++;
1831                 }
1832               }
1833             } else {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] = *value++;
1837                 }
1838               }
1839             }
1840           } else {
1841             if (is == ADD_VALUES) {
1842               for (ii=0; ii<bs; ii++,value+=stepval) {
1843                 for (jj=0; jj<bs; jj++) {
1844                   *bap++ += *value++;
1845                 }
1846               }
1847             } else {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++  = *value++;
1851                 }
1852               }
1853             }
1854           }
1855           goto noinsert2;
1856         }
1857       }
1858       if (nonew == 1) goto noinsert2;
1859       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1860       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1861       N = nrow++ - 1; high++;
1862       /* shift up all the later entries in this row */
1863       for (ii=N; ii>=i; ii--) {
1864         rp[ii+1] = rp[ii];
1865         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1866       }
1867       if (N >= i) {
1868         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1869       }
1870       rp[i] = col;
1871       bap   = ap +  bs2*i;
1872       if (roworiented) {
1873         for (ii=0; ii<bs; ii++,value+=stepval) {
1874           for (jj=ii; jj<bs2; jj+=bs) {
1875             bap[jj] = *value++;
1876           }
1877         }
1878       } else {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=0; jj<bs; jj++) {
1881             *bap++  = *value++;
1882           }
1883         }
1884       }
1885       noinsert2:;
1886       low = i;
1887     }
1888     ailen[row] = nrow;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1896 {
1897   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1898   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1899   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1900   PetscErrorCode ierr;
1901   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1902   MatScalar      *aa = a->a,*ap;
1903   PetscReal      ratio=0.6;
1904 
1905   PetscFunctionBegin;
1906   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1907 
1908   if (m) rmax = ailen[0];
1909   for (i=1; i<mbs; i++) {
1910     /* move each row back by the amount of empty slots (fshift) before it*/
1911     fshift += imax[i-1] - ailen[i-1];
1912     rmax   = PetscMax(rmax,ailen[i]);
1913     if (fshift) {
1914       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1915       N = ailen[i];
1916       for (j=0; j<N; j++) {
1917         ip[j-fshift] = ip[j];
1918         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1919       }
1920     }
1921     ai[i] = ai[i-1] + ailen[i-1];
1922   }
1923   if (mbs) {
1924     fshift += imax[mbs-1] - ailen[mbs-1];
1925     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1926   }
1927   /* reset ilen and imax for each row */
1928   for (i=0; i<mbs; i++) {
1929     ailen[i] = imax[i] = ai[i+1] - ai[i];
1930   }
1931   a->nz = ai[mbs];
1932 
1933   /* diagonals may have moved, so kill the diagonal pointers */
1934   a->idiagvalid = PETSC_FALSE;
1935   if (fshift && a->diag) {
1936     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1937     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1938     a->diag = 0;
1939   }
1940   if (fshift && a->nounused == -1) {
1941     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1942   }
1943   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1945   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1946   a->reallocs          = 0;
1947   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1948 
1949   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1950   if (a->compressedrow.use){
1951     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1952   }
1953 
1954   A->same_nonzero = PETSC_TRUE;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 /*
1959    This function returns an array of flags which indicate the locations of contiguous
1960    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1961    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1962    Assume: sizes should be long enough to hold all the values.
1963 */
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1967 {
1968   PetscInt   i,j,k,row;
1969   PetscTruth flg;
1970 
1971   PetscFunctionBegin;
1972   for (i=0,j=0; i<n; j++) {
1973     row = idx[i];
1974     if (row%bs!=0) { /* Not the begining of a block */
1975       sizes[j] = 1;
1976       i++;
1977     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1978       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1979       i++;
1980     } else { /* Begining of the block, so check if the complete block exists */
1981       flg = PETSC_TRUE;
1982       for (k=1; k<bs; k++) {
1983         if (row+k != idx[i+k]) { /* break in the block */
1984           flg = PETSC_FALSE;
1985           break;
1986         }
1987       }
1988       if (flg) { /* No break in the bs */
1989         sizes[j] = bs;
1990         i+= bs;
1991       } else {
1992         sizes[j] = 1;
1993         i++;
1994       }
1995     }
1996   }
1997   *bs_max = j;
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2004 {
2005   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007   PetscInt       i,j,k,count,*rows;
2008   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2009   PetscScalar    zero = 0.0;
2010   MatScalar      *aa;
2011 
2012   PetscFunctionBegin;
2013   /* Make a copy of the IS and  sort it */
2014   /* allocate memory for rows,sizes */
2015   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2016   sizes = rows + is_n;
2017 
2018   /* copy IS values to rows, and sort them */
2019   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2020   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2021   if (baij->keepnonzeropattern) {
2022     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2023     bs_max = is_n;
2024     A->same_nonzero = PETSC_TRUE;
2025   } else {
2026     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2027     A->same_nonzero = PETSC_FALSE;
2028   }
2029 
2030   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2031     row   = rows[j];
2032     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2033     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2034     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2035     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2036       if (diag != 0.0) {
2037         if (baij->ilen[row/bs] > 0) {
2038           baij->ilen[row/bs]       = 1;
2039           baij->j[baij->i[row/bs]] = row/bs;
2040           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2041         }
2042         /* Now insert all the diagonal values for this bs */
2043         for (k=0; k<bs; k++) {
2044           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2045         }
2046       } else { /* (diag == 0.0) */
2047         baij->ilen[row/bs] = 0;
2048       } /* end (diag == 0.0) */
2049     } else { /* (sizes[i] != bs) */
2050 #if defined (PETSC_USE_DEBUG)
2051       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2052 #endif
2053       for (k=0; k<count; k++) {
2054         aa[0] =  zero;
2055         aa    += bs;
2056       }
2057       if (diag != 0.0) {
2058         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059       }
2060     }
2061   }
2062 
2063   ierr = PetscFree(rows);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2070 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2071 {
2072   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2073   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2074   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2075   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2076   PetscErrorCode ierr;
2077   PetscInt       ridx,cidx,bs2=a->bs2;
2078   PetscTruth     roworiented=a->roworiented;
2079   MatScalar      *ap,value,*aa=a->a,*bap;
2080 
2081   PetscFunctionBegin;
2082   for (k=0; k<m; k++) { /* loop over added rows */
2083     row  = im[k];
2084     brow = row/bs;
2085     if (row < 0) continue;
2086 #if defined(PETSC_USE_DEBUG)
2087     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2088 #endif
2089     rp   = aj + ai[brow];
2090     ap   = aa + bs2*ai[brow];
2091     rmax = imax[brow];
2092     nrow = ailen[brow];
2093     low  = 0;
2094     high = nrow;
2095     for (l=0; l<n; l++) { /* loop over added columns */
2096       if (in[l] < 0) continue;
2097 #if defined(PETSC_USE_DEBUG)
2098       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2099 #endif
2100       col = in[l]; bcol = col/bs;
2101       ridx = row % bs; cidx = col % bs;
2102       if (roworiented) {
2103         value = v[l + k*n];
2104       } else {
2105         value = v[k + l*m];
2106       }
2107       if (col <= lastcol) low = 0; else high = nrow;
2108       lastcol = col;
2109       while (high-low > 7) {
2110         t = (low+high)/2;
2111         if (rp[t] > bcol) high = t;
2112         else              low  = t;
2113       }
2114       for (i=low; i<high; i++) {
2115         if (rp[i] > bcol) break;
2116         if (rp[i] == bcol) {
2117           bap  = ap +  bs2*i + bs*cidx + ridx;
2118           if (is == ADD_VALUES) *bap += value;
2119           else                  *bap  = value;
2120           goto noinsert1;
2121         }
2122       }
2123       if (nonew == 1) goto noinsert1;
2124       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2125       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2126       N = nrow++ - 1; high++;
2127       /* shift up all the later entries in this row */
2128       for (ii=N; ii>=i; ii--) {
2129         rp[ii+1] = rp[ii];
2130         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2131       }
2132       if (N>=i) {
2133         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2134       }
2135       rp[i]                      = bcol;
2136       ap[bs2*i + bs*cidx + ridx] = value;
2137       a->nz++;
2138       noinsert1:;
2139       low = i;
2140     }
2141     ailen[brow] = nrow;
2142   }
2143   A->same_nonzero = PETSC_FALSE;
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2151 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2152 {
2153   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2154   Mat            outA;
2155   PetscErrorCode ierr;
2156   PetscTruth     row_identity,col_identity;
2157 
2158   PetscFunctionBegin;
2159   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2160   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2161   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2162   if (!row_identity || !col_identity) {
2163     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2164   }
2165 
2166   outA          = inA;
2167   inA->factor   = MAT_FACTOR_LU;
2168 
2169   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2170 
2171   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2172   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2173   a->row = row;
2174   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2175   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2176   a->col = col;
2177 
2178   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2179   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2181 
2182   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2183   if (!a->solve_work) {
2184     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2185     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2186   }
2187   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2188 
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 EXTERN_C_BEGIN
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2195 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2196 {
2197   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2198   PetscInt    i,nz,mbs;
2199 
2200   PetscFunctionBegin;
2201   nz  = baij->maxnz/baij->bs2;
2202   mbs = baij->mbs;
2203   for (i=0; i<nz; i++) {
2204     baij->j[i] = indices[i];
2205   }
2206   baij->nz = nz;
2207   for (i=0; i<mbs; i++) {
2208     baij->ilen[i] = baij->imax[i];
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 EXTERN_C_END
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2216 /*@
2217     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2218        in the matrix.
2219 
2220   Input Parameters:
2221 +  mat - the SeqBAIJ matrix
2222 -  indices - the column indices
2223 
2224   Level: advanced
2225 
2226   Notes:
2227     This can be called if you have precomputed the nonzero structure of the
2228   matrix and want to provide it to the matrix object to improve the performance
2229   of the MatSetValues() operation.
2230 
2231     You MUST have set the correct numbers of nonzeros per row in the call to
2232   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2233 
2234     MUST be called before any calls to MatSetValues();
2235 
2236 @*/
2237 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2238 {
2239   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2240 
2241   PetscFunctionBegin;
2242   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2243   PetscValidPointer(indices,2);
2244   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2245   if (f) {
2246     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2247   } else {
2248     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2249   }
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2255 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2256 {
2257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2258   PetscErrorCode ierr;
2259   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2260   PetscReal      atmp;
2261   PetscScalar    *x,zero = 0.0;
2262   MatScalar      *aa;
2263   PetscInt       ncols,brow,krow,kcol;
2264 
2265   PetscFunctionBegin;
2266   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2267   bs   = A->rmap->bs;
2268   aa   = a->a;
2269   ai   = a->i;
2270   aj   = a->j;
2271   mbs  = a->mbs;
2272 
2273   ierr = VecSet(v,zero);CHKERRQ(ierr);
2274   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2275   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2276   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2277   for (i=0; i<mbs; i++) {
2278     ncols = ai[1] - ai[0]; ai++;
2279     brow  = bs*i;
2280     for (j=0; j<ncols; j++){
2281       for (kcol=0; kcol<bs; kcol++){
2282         for (krow=0; krow<bs; krow++){
2283           atmp = PetscAbsScalar(*aa);aa++;
2284           row = brow + krow;    /* row index */
2285           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2286           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2287         }
2288       }
2289       aj++;
2290     }
2291   }
2292   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatCopy_SeqBAIJ"
2298 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2299 {
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   /* If the two matrices have the same copy implementation, use fast copy. */
2304   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2305     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2306     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2307 
2308     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2309       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2310     }
2311     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2312   } else {
2313     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2314   }
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2320 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2321 {
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2331 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2332 {
2333   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2334   PetscFunctionBegin;
2335   *array = a->a;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2341 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2342 {
2343   PetscFunctionBegin;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #include "petscblaslapack.h"
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2350 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2351 {
2352   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2353   PetscErrorCode ierr;
2354   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2355   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2356 
2357   PetscFunctionBegin;
2358   if (str == SAME_NONZERO_PATTERN) {
2359     PetscScalar alpha = a;
2360     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2361   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2362     if (y->xtoy && y->XtoY != X) {
2363       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2364       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2365     }
2366     if (!y->xtoy) { /* get xtoy */
2367       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2368       y->XtoY = X;
2369       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2370     }
2371     bs2 = bs*bs;
2372     for (i=0; i<x->nz; i++) {
2373       j = 0;
2374       while (j < bs2){
2375         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2376         j++;
2377       }
2378     }
2379     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2380   } else {
2381     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2388 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2389 {
2390   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2391   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2392   MatScalar      *aa = a->a;
2393 
2394   PetscFunctionBegin;
2395   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2401 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2402 {
2403   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2404   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2405   MatScalar      *aa = a->a;
2406 
2407   PetscFunctionBegin;
2408   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2416 /*
2417     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2418 */
2419 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2420 {
2421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2424   PetscInt       nz = a->i[m],row,*jj,mr,col;
2425 
2426   PetscFunctionBegin;
2427   *nn = n;
2428   if (!ia) PetscFunctionReturn(0);
2429   if (symmetric) {
2430     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2431   } else {
2432     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2433     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2434     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2435     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2436     jj = a->j;
2437     for (i=0; i<nz; i++) {
2438       collengths[jj[i]]++;
2439     }
2440     cia[0] = oshift;
2441     for (i=0; i<n; i++) {
2442       cia[i+1] = cia[i] + collengths[i];
2443     }
2444     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2445     jj   = a->j;
2446     for (row=0; row<m; row++) {
2447       mr = a->i[row+1] - a->i[row];
2448       for (i=0; i<mr; i++) {
2449         col = *jj++;
2450         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2451       }
2452     }
2453     ierr = PetscFree(collengths);CHKERRQ(ierr);
2454     *ia = cia; *ja = cja;
2455   }
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2461 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2462 {
2463   PetscErrorCode ierr;
2464 
2465   PetscFunctionBegin;
2466   if (!ia) PetscFunctionReturn(0);
2467   ierr = PetscFree(*ia);CHKERRQ(ierr);
2468   ierr = PetscFree(*ja);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2474 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2475 {
2476   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2477   PetscErrorCode ierr;
2478   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2479   PetscScalar    dx,*y,*xx,*w3_array;
2480   PetscScalar    *vscale_array;
2481   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2482   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2483   void           *fctx = coloring->fctx;
2484   PetscTruth     flg = PETSC_FALSE;
2485   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2486   Vec            x1_tmp;
2487 
2488   PetscFunctionBegin;
2489   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2490   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2491   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2492   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2493 
2494   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2495   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2496   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2497   if (flg) {
2498     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2499   } else {
2500     PetscTruth assembled;
2501     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2502     if (assembled) {
2503       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2504     }
2505   }
2506 
2507   x1_tmp = x1;
2508   if (!coloring->vscale){
2509     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2510   }
2511 
2512   /*
2513     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2514     coloring->F for the coarser grids from the finest
2515   */
2516   if (coloring->F) {
2517     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2518     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2519     if (m1 != m2) {
2520       coloring->F = 0;
2521       }
2522     }
2523 
2524   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2525     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2526   }
2527   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2528 
2529   /* Set w1 = F(x1) */
2530   if (coloring->F) {
2531     w1          = coloring->F; /* use already computed value of function */
2532     coloring->F = 0;
2533   } else {
2534     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2535     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2536     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2537   }
2538 
2539   if (!coloring->w3) {
2540     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2541     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2542   }
2543   w3 = coloring->w3;
2544 
2545     CHKMEMQ;
2546     /* Compute all the local scale factors, including ghost points */
2547   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2548   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2549   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2550   if (ctype == IS_COLORING_GHOSTED){
2551     col_start = 0; col_end = N;
2552   } else if (ctype == IS_COLORING_GLOBAL){
2553     xx = xx - start;
2554     vscale_array = vscale_array - start;
2555     col_start = start; col_end = N + start;
2556   }    CHKMEMQ;
2557   for (col=col_start; col<col_end; col++){
2558     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2559     if (coloring->htype[0] == 'w') {
2560       dx = 1.0 + unorm;
2561     } else {
2562       dx  = xx[col];
2563     }
2564     if (dx == 0.0) dx = 1.0;
2565 #if !defined(PETSC_USE_COMPLEX)
2566     if (dx < umin && dx >= 0.0)      dx = umin;
2567     else if (dx < 0.0 && dx > -umin) dx = -umin;
2568 #else
2569     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2570     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2571 #endif
2572     dx               *= epsilon;
2573     vscale_array[col] = 1.0/dx;
2574   }     CHKMEMQ;
2575   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2576   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2577   if (ctype == IS_COLORING_GLOBAL){
2578     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2579     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2580   }
2581         CHKMEMQ;
2582   if (coloring->vscaleforrow) {
2583     vscaleforrow = coloring->vscaleforrow;
2584   } else {
2585     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2586   }
2587 
2588 
2589   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2590   /*
2591     Loop over each color
2592   */
2593   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2594   for (k=0; k<coloring->ncolors; k++) {
2595     coloring->currentcolor = k;
2596     for (i=0; i<bs; i++) {
2597       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2598       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2599       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2600       /*
2601 	Loop over each column associated with color
2602 	adding the perturbation to the vector w3.
2603       */
2604       for (l=0; l<coloring->ncolumns[k]; l++) {
2605 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2606 	if (coloring->htype[0] == 'w') {
2607 	  dx = 1.0 + unorm;
2608 	} else {
2609 	  dx  = xx[col];
2610 	}
2611 	if (dx == 0.0) dx = 1.0;
2612 #if !defined(PETSC_USE_COMPLEX)
2613 	if (dx < umin && dx >= 0.0)      dx = umin;
2614 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2615 #else
2616 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2617 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2618 #endif
2619 	dx            *= epsilon;
2620 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2621 	w3_array[col] += dx;
2622       }
2623       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2624       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2625 
2626       /*
2627 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2628 	w2 = F(x1 + dx) - F(x1)
2629       */
2630       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2631       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2632       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2633       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2634 
2635       /*
2636 	Loop over rows of vector, putting results into Jacobian matrix
2637       */
2638       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2639       for (l=0; l<coloring->nrows[k]; l++) {
2640 	row    = bs*coloring->rows[k][l];             /* local row index */
2641 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2642         for (j=0; j<bs; j++) {
2643   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2644           srows[j]  = row + start + j;
2645         }
2646 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2647       }
2648       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2649     }
2650   } /* endof for each color */
2651   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2652   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2653   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2654   ierr = PetscFree(srows);CHKERRQ(ierr);
2655 
2656   coloring->currentcolor = -1;
2657   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2659   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 /* -------------------------------------------------------------------*/
2664 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2665        MatGetRow_SeqBAIJ,
2666        MatRestoreRow_SeqBAIJ,
2667        MatMult_SeqBAIJ_N,
2668 /* 4*/ MatMultAdd_SeqBAIJ_N,
2669        MatMultTranspose_SeqBAIJ,
2670        MatMultTransposeAdd_SeqBAIJ,
2671        0,
2672        0,
2673        0,
2674 /*10*/ 0,
2675        MatLUFactor_SeqBAIJ,
2676        0,
2677        0,
2678        MatTranspose_SeqBAIJ,
2679 /*15*/ MatGetInfo_SeqBAIJ,
2680        MatEqual_SeqBAIJ,
2681        MatGetDiagonal_SeqBAIJ,
2682        MatDiagonalScale_SeqBAIJ,
2683        MatNorm_SeqBAIJ,
2684 /*20*/ 0,
2685        MatAssemblyEnd_SeqBAIJ,
2686        MatSetOption_SeqBAIJ,
2687        MatZeroEntries_SeqBAIJ,
2688 /*24*/ MatZeroRows_SeqBAIJ,
2689        0,
2690        0,
2691        0,
2692        0,
2693 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2694        0,
2695        0,
2696        MatGetArray_SeqBAIJ,
2697        MatRestoreArray_SeqBAIJ,
2698 /*34*/ MatDuplicate_SeqBAIJ,
2699        0,
2700        0,
2701        MatILUFactor_SeqBAIJ,
2702        0,
2703 /*39*/ MatAXPY_SeqBAIJ,
2704        MatGetSubMatrices_SeqBAIJ,
2705        MatIncreaseOverlap_SeqBAIJ,
2706        MatGetValues_SeqBAIJ,
2707        MatCopy_SeqBAIJ,
2708 /*44*/ 0,
2709        MatScale_SeqBAIJ,
2710        0,
2711        0,
2712        MatILUDTFactor_SeqBAIJ,
2713 /*49*/ 0,
2714        MatGetRowIJ_SeqBAIJ,
2715        MatRestoreRowIJ_SeqBAIJ,
2716        MatGetColumnIJ_SeqBAIJ,
2717        MatRestoreColumnIJ_SeqBAIJ,
2718 /*54*/ MatFDColoringCreate_SeqAIJ,
2719        0,
2720        0,
2721        0,
2722        MatSetValuesBlocked_SeqBAIJ,
2723 /*59*/ MatGetSubMatrix_SeqBAIJ,
2724        MatDestroy_SeqBAIJ,
2725        MatView_SeqBAIJ,
2726        0,
2727        0,
2728 /*64*/ 0,
2729        0,
2730        0,
2731        0,
2732        0,
2733 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2734        0,
2735        MatConvert_Basic,
2736        0,
2737        0,
2738 /*74*/ 0,
2739        MatFDColoringApply_BAIJ,
2740        0,
2741        0,
2742        0,
2743 /*79*/ 0,
2744        0,
2745        0,
2746        0,
2747        MatLoad_SeqBAIJ,
2748 /*84*/ 0,
2749        0,
2750        0,
2751        0,
2752        0,
2753 /*89*/ 0,
2754        0,
2755        0,
2756        0,
2757        0,
2758 /*94*/ 0,
2759        0,
2760        0,
2761        0,
2762        0,
2763 /*99*/0,
2764        0,
2765        0,
2766        0,
2767        0,
2768 /*104*/0,
2769        MatRealPart_SeqBAIJ,
2770        MatImaginaryPart_SeqBAIJ,
2771        0,
2772        0,
2773 /*109*/0,
2774        0,
2775        0,
2776        0,
2777        MatMissingDiagonal_SeqBAIJ
2778 /*114*/
2779 };
2780 
2781 EXTERN_C_BEGIN
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2784 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2785 {
2786   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2787   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   if (aij->nonew != 1) {
2792     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2793   }
2794 
2795   /* allocate space for values if not already there */
2796   if (!aij->saved_values) {
2797     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2798   }
2799 
2800   /* copy values over */
2801   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2802   PetscFunctionReturn(0);
2803 }
2804 EXTERN_C_END
2805 
2806 EXTERN_C_BEGIN
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2809 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2810 {
2811   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2812   PetscErrorCode ierr;
2813   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2814 
2815   PetscFunctionBegin;
2816   if (aij->nonew != 1) {
2817     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2818   }
2819   if (!aij->saved_values) {
2820     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2821   }
2822 
2823   /* copy values over */
2824   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 EXTERN_C_END
2828 
2829 EXTERN_C_BEGIN
2830 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2831 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2832 EXTERN_C_END
2833 
2834 EXTERN_C_BEGIN
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2837 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2838 {
2839   Mat_SeqBAIJ    *b;
2840   PetscErrorCode ierr;
2841   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2842   PetscTruth     flg,skipallocation = PETSC_FALSE;
2843 
2844   PetscFunctionBegin;
2845 
2846   if (nz == MAT_SKIP_ALLOCATION) {
2847     skipallocation = PETSC_TRUE;
2848     nz             = 0;
2849   }
2850 
2851   if (bs < 0) {
2852     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2853       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2854     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2855     bs   = PetscAbs(bs);
2856   }
2857   if (nnz && newbs != bs) {
2858     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2859   }
2860   bs   = newbs;
2861 
2862   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2863   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2864   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2865   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2866 
2867   B->preallocated = PETSC_TRUE;
2868 
2869   mbs  = B->rmap->n/bs;
2870   nbs  = B->cmap->n/bs;
2871   bs2  = bs*bs;
2872 
2873   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2874     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2875   }
2876 
2877   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2878   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2879   if (nnz) {
2880     for (i=0; i<mbs; i++) {
2881       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2882       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2883     }
2884   }
2885 
2886   b       = (Mat_SeqBAIJ*)B->data;
2887   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2888     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2889   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2890 
2891   if (!flg) {
2892     switch (bs) {
2893     case 1:
2894       B->ops->mult            = MatMult_SeqBAIJ_1;
2895       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2896       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2897       break;
2898     case 2:
2899       B->ops->mult            = MatMult_SeqBAIJ_2;
2900       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2901       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2902       break;
2903     case 3:
2904       B->ops->mult            = MatMult_SeqBAIJ_3;
2905       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2906       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2907       break;
2908     case 4:
2909       B->ops->mult            = MatMult_SeqBAIJ_4;
2910       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2911       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2912       break;
2913     case 5:
2914       B->ops->mult            = MatMult_SeqBAIJ_5;
2915       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2916       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2917       break;
2918     case 6:
2919       B->ops->mult            = MatMult_SeqBAIJ_6;
2920       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2921       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2922       break;
2923     case 7:
2924       B->ops->mult            = MatMult_SeqBAIJ_7;
2925       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2926       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2927       break;
2928     default:
2929       B->ops->mult            = MatMult_SeqBAIJ_N;
2930       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2931       break;
2932     }
2933   }
2934   B->rmap->bs      = bs;
2935   b->mbs     = mbs;
2936   b->nbs     = nbs;
2937   if (!skipallocation) {
2938     if (!b->imax) {
2939       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2940     }
2941     /* b->ilen will count nonzeros in each block row so far. */
2942     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2943     if (!nnz) {
2944       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2945       else if (nz <= 0)        nz = 1;
2946       for (i=0; i<mbs; i++) b->imax[i] = nz;
2947       nz = nz*mbs;
2948     } else {
2949       nz = 0;
2950       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2951     }
2952 
2953     /* allocate the matrix space */
2954     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2955     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2956     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2957     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2958     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2959     b->singlemalloc = PETSC_TRUE;
2960     b->i[0] = 0;
2961     for (i=1; i<mbs+1; i++) {
2962       b->i[i] = b->i[i-1] + b->imax[i-1];
2963     }
2964     b->free_a     = PETSC_TRUE;
2965     b->free_ij    = PETSC_TRUE;
2966   } else {
2967     b->free_a     = PETSC_FALSE;
2968     b->free_ij    = PETSC_FALSE;
2969   }
2970 
2971   B->rmap->bs          = bs;
2972   b->bs2              = bs2;
2973   b->mbs              = mbs;
2974   b->nz               = 0;
2975   b->maxnz            = nz*bs2;
2976   B->info.nz_unneeded = (PetscReal)b->maxnz;
2977   PetscFunctionReturn(0);
2978 }
2979 EXTERN_C_END
2980 
2981 EXTERN_C_BEGIN
2982 #undef __FUNCT__
2983 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2984 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2985 {
2986   PetscInt       i,m,nz,nz_max=0,*nnz;
2987   PetscScalar    *values=0;
2988   PetscErrorCode ierr;
2989 
2990   PetscFunctionBegin;
2991 
2992   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2993 
2994   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2995   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2996   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2997   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2998   m = B->rmap->n/bs;
2999 
3000   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3001   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3002   for(i=0; i<m; i++) {
3003     nz = ii[i+1]- ii[i];
3004     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3005     nz_max = PetscMax(nz_max, nz);
3006     nnz[i] = nz;
3007   }
3008   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3009   ierr = PetscFree(nnz);CHKERRQ(ierr);
3010 
3011   values = (PetscScalar*)V;
3012   if (!values) {
3013     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3014     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3015   }
3016   for (i=0; i<m; i++) {
3017     PetscInt          ncols  = ii[i+1] - ii[i];
3018     const PetscInt    *icols = jj + ii[i];
3019     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3020     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3021   }
3022   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3023   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025 
3026   PetscFunctionReturn(0);
3027 }
3028 EXTERN_C_END
3029 
3030 
3031 EXTERN_C_BEGIN
3032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3033 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3034 EXTERN_C_END
3035 
3036 /*MC
3037    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3038    block sparse compressed row format.
3039 
3040    Options Database Keys:
3041 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3042 
3043   Level: beginner
3044 
3045 .seealso: MatCreateSeqBAIJ()
3046 M*/
3047 
3048 
3049 EXTERN_C_BEGIN
3050 #undef __FUNCT__
3051 #define __FUNCT__ "MatCreate_SeqBAIJ"
3052 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3053 {
3054   PetscErrorCode ierr;
3055   PetscMPIInt    size;
3056   Mat_SeqBAIJ    *b;
3057 
3058   PetscFunctionBegin;
3059   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3060   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3061 
3062   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3063   B->data = (void*)b;
3064   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3065   B->mapping               = 0;
3066   b->row                   = 0;
3067   b->col                   = 0;
3068   b->icol                  = 0;
3069   b->reallocs              = 0;
3070   b->saved_values          = 0;
3071 
3072   b->roworiented           = PETSC_TRUE;
3073   b->nonew                 = 0;
3074   b->diag                  = 0;
3075   b->solve_work            = 0;
3076   b->mult_work             = 0;
3077   B->spptr                 = 0;
3078   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3079   b->keepnonzeropattern    = PETSC_FALSE;
3080   b->xtoy                  = 0;
3081   b->XtoY                  = 0;
3082   b->compressedrow.use     = PETSC_FALSE;
3083   b->compressedrow.nrows   = 0;
3084   b->compressedrow.i       = PETSC_NULL;
3085   b->compressedrow.rindex  = PETSC_NULL;
3086   b->compressedrow.checked = PETSC_FALSE;
3087   B->same_nonzero          = PETSC_FALSE;
3088 
3089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
3090                                      "MatGetFactorAvailable_seqbaij_petsc",
3091                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
3093                                      "MatGetFactor_seqbaij_petsc",
3094                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3096                                      "MatInvertBlockDiagonal_SeqBAIJ",
3097                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3099                                      "MatStoreValues_SeqBAIJ",
3100                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3102                                      "MatRetrieveValues_SeqBAIJ",
3103                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3105                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3106                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3108                                      "MatConvert_SeqBAIJ_SeqAIJ",
3109                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3111                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3112                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3114                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3115                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3117                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3118                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3119   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3120   PetscFunctionReturn(0);
3121 }
3122 EXTERN_C_END
3123 
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3126 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
3127 {
3128   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3129   PetscErrorCode ierr;
3130   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3131 
3132   PetscFunctionBegin;
3133   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3134 
3135   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3136   for (i=0; i<mbs; i++) {
3137     c->imax[i] = a->imax[i];
3138     c->ilen[i] = a->ilen[i];
3139   }
3140 
3141   /* allocate the matrix space */
3142   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3143   c->singlemalloc = PETSC_TRUE;
3144   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3145   if (mbs > 0) {
3146     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3147     if (cpvalues == MAT_COPY_VALUES) {
3148       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3149     } else {
3150       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3151     }
3152   }
3153   c->roworiented = a->roworiented;
3154   c->nonew       = a->nonew;
3155   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
3156   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
3157   c->bs2         = a->bs2;
3158   c->mbs         = a->mbs;
3159   c->nbs         = a->nbs;
3160 
3161   if (a->diag) {
3162     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3163     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3164     for (i=0; i<mbs; i++) {
3165       c->diag[i] = a->diag[i];
3166     }
3167   } else c->diag        = 0;
3168   c->nz                 = a->nz;
3169   c->maxnz              = a->maxnz;
3170   c->solve_work         = 0;
3171   c->mult_work          = 0;
3172   c->free_a             = PETSC_TRUE;
3173   c->free_ij            = PETSC_TRUE;
3174   C->preallocated       = PETSC_TRUE;
3175   C->assembled          = PETSC_TRUE;
3176 
3177   c->compressedrow.use     = a->compressedrow.use;
3178   c->compressedrow.nrows   = a->compressedrow.nrows;
3179   c->compressedrow.checked = a->compressedrow.checked;
3180   if ( a->compressedrow.checked && a->compressedrow.use){
3181     i = a->compressedrow.nrows;
3182     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
3183     c->compressedrow.rindex = c->compressedrow.i + i + 1;
3184     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3185     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3186   } else {
3187     c->compressedrow.use    = PETSC_FALSE;
3188     c->compressedrow.i      = PETSC_NULL;
3189     c->compressedrow.rindex = PETSC_NULL;
3190   }
3191   C->same_nonzero = A->same_nonzero;
3192   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 #undef __FUNCT__
3197 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3198 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3199 {
3200     PetscErrorCode ierr;
3201 
3202   PetscFunctionBegin;
3203   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3204   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3205   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3206   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatLoad_SeqBAIJ"
3212 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3213 {
3214   Mat_SeqBAIJ    *a;
3215   Mat            B;
3216   PetscErrorCode ierr;
3217   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3218   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3219   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3220   PetscInt       *masked,nmask,tmp,bs2,ishift;
3221   PetscMPIInt    size;
3222   int            fd;
3223   PetscScalar    *aa;
3224   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3225 
3226   PetscFunctionBegin;
3227   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3228     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3229   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3230   bs2  = bs*bs;
3231 
3232   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3233   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3234   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3235   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3236   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3237   M = header[1]; N = header[2]; nz = header[3];
3238 
3239   if (header[3] < 0) {
3240     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3241   }
3242 
3243   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3244 
3245   /*
3246      This code adds extra rows to make sure the number of rows is
3247     divisible by the blocksize
3248   */
3249   mbs        = M/bs;
3250   extra_rows = bs - M + bs*(mbs);
3251   if (extra_rows == bs) extra_rows = 0;
3252   else                  mbs++;
3253   if (extra_rows) {
3254     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3255   }
3256 
3257   /* read in row lengths */
3258   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3259   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3260   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3261 
3262   /* read in column indices */
3263   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3264   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3265   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3266 
3267   /* loop over row lengths determining block row lengths */
3268   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3269   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3270   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3271   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3272   masked   = mask + mbs;
3273   rowcount = 0; nzcount = 0;
3274   for (i=0; i<mbs; i++) {
3275     nmask = 0;
3276     for (j=0; j<bs; j++) {
3277       kmax = rowlengths[rowcount];
3278       for (k=0; k<kmax; k++) {
3279         tmp = jj[nzcount++]/bs;
3280         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3281       }
3282       rowcount++;
3283     }
3284     browlengths[i] += nmask;
3285     /* zero out the mask elements we set */
3286     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3287   }
3288 
3289   /* create our matrix */
3290   ierr = MatCreate(comm,&B);
3291   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3292   ierr = MatSetType(B,type);CHKERRQ(ierr);
3293   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3294   a = (Mat_SeqBAIJ*)B->data;
3295 
3296   /* set matrix "i" values */
3297   a->i[0] = 0;
3298   for (i=1; i<= mbs; i++) {
3299     a->i[i]      = a->i[i-1] + browlengths[i-1];
3300     a->ilen[i-1] = browlengths[i-1];
3301   }
3302   a->nz         = 0;
3303   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3304 
3305   /* read in nonzero values */
3306   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3307   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3308   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3309 
3310   /* set "a" and "j" values into matrix */
3311   nzcount = 0; jcount = 0;
3312   for (i=0; i<mbs; i++) {
3313     nzcountb = nzcount;
3314     nmask    = 0;
3315     for (j=0; j<bs; j++) {
3316       kmax = rowlengths[i*bs+j];
3317       for (k=0; k<kmax; k++) {
3318         tmp = jj[nzcount++]/bs;
3319 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3320       }
3321     }
3322     /* sort the masked values */
3323     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3324 
3325     /* set "j" values into matrix */
3326     maskcount = 1;
3327     for (j=0; j<nmask; j++) {
3328       a->j[jcount++]  = masked[j];
3329       mask[masked[j]] = maskcount++;
3330     }
3331     /* set "a" values into matrix */
3332     ishift = bs2*a->i[i];
3333     for (j=0; j<bs; j++) {
3334       kmax = rowlengths[i*bs+j];
3335       for (k=0; k<kmax; k++) {
3336         tmp       = jj[nzcountb]/bs ;
3337         block     = mask[tmp] - 1;
3338         point     = jj[nzcountb] - bs*tmp;
3339         idx       = ishift + bs2*block + j + bs*point;
3340         a->a[idx] = (MatScalar)aa[nzcountb++];
3341       }
3342     }
3343     /* zero out the mask elements we set */
3344     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3345   }
3346   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3347 
3348   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3349   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3350   ierr = PetscFree(aa);CHKERRQ(ierr);
3351   ierr = PetscFree(jj);CHKERRQ(ierr);
3352   ierr = PetscFree(mask);CHKERRQ(ierr);
3353 
3354   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3355   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3356   ierr = MatView_Private(B);CHKERRQ(ierr);
3357 
3358   *A = B;
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatCreateSeqBAIJ"
3364 /*@C
3365    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3366    compressed row) format.  For good matrix assembly performance the
3367    user should preallocate the matrix storage by setting the parameter nz
3368    (or the array nnz).  By setting these parameters accurately, performance
3369    during matrix assembly can be increased by more than a factor of 50.
3370 
3371    Collective on MPI_Comm
3372 
3373    Input Parameters:
3374 +  comm - MPI communicator, set to PETSC_COMM_SELF
3375 .  bs - size of block
3376 .  m - number of rows
3377 .  n - number of columns
3378 .  nz - number of nonzero blocks  per block row (same for all rows)
3379 -  nnz - array containing the number of nonzero blocks in the various block rows
3380          (possibly different for each block row) or PETSC_NULL
3381 
3382    Output Parameter:
3383 .  A - the matrix
3384 
3385    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3386    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3387    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3388 
3389    Options Database Keys:
3390 .   -mat_no_unroll - uses code that does not unroll the loops in the
3391                      block calculations (much slower)
3392 .    -mat_block_size - size of the blocks to use
3393 
3394    Level: intermediate
3395 
3396    Notes:
3397    The number of rows and columns must be divisible by blocksize.
3398 
3399    If the nnz parameter is given then the nz parameter is ignored
3400 
3401    A nonzero block is any block that as 1 or more nonzeros in it
3402 
3403    The block AIJ format is fully compatible with standard Fortran 77
3404    storage.  That is, the stored row and column indices can begin at
3405    either one (as in Fortran) or zero.  See the users' manual for details.
3406 
3407    Specify the preallocated storage with either nz or nnz (not both).
3408    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3409    allocation.  For additional details, see the users manual chapter on
3410    matrices.
3411 
3412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3413 @*/
3414 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3415 {
3416   PetscErrorCode ierr;
3417 
3418   PetscFunctionBegin;
3419   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3420   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3421   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3422   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3428 /*@C
3429    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3430    per row in the matrix. For good matrix assembly performance the
3431    user should preallocate the matrix storage by setting the parameter nz
3432    (or the array nnz).  By setting these parameters accurately, performance
3433    during matrix assembly can be increased by more than a factor of 50.
3434 
3435    Collective on MPI_Comm
3436 
3437    Input Parameters:
3438 +  A - the matrix
3439 .  bs - size of block
3440 .  nz - number of block nonzeros per block row (same for all rows)
3441 -  nnz - array containing the number of block nonzeros in the various block rows
3442          (possibly different for each block row) or PETSC_NULL
3443 
3444    Options Database Keys:
3445 .   -mat_no_unroll - uses code that does not unroll the loops in the
3446                      block calculations (much slower)
3447 .    -mat_block_size - size of the blocks to use
3448 
3449    Level: intermediate
3450 
3451    Notes:
3452    If the nnz parameter is given then the nz parameter is ignored
3453 
3454    You can call MatGetInfo() to get information on how effective the preallocation was;
3455    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3456    You can also run with the option -info and look for messages with the string
3457    malloc in them to see if additional memory allocation was needed.
3458 
3459    The block AIJ format is fully compatible with standard Fortran 77
3460    storage.  That is, the stored row and column indices can begin at
3461    either one (as in Fortran) or zero.  See the users' manual for details.
3462 
3463    Specify the preallocated storage with either nz or nnz (not both).
3464    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3465    allocation.  For additional details, see the users manual chapter on
3466    matrices.
3467 
3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3469 @*/
3470 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3471 {
3472   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3473 
3474   PetscFunctionBegin;
3475   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3476   if (f) {
3477     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3478   }
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3484 /*@C
3485    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3486    (the default sequential PETSc format).
3487 
3488    Collective on MPI_Comm
3489 
3490    Input Parameters:
3491 +  A - the matrix
3492 .  i - the indices into j for the start of each local row (starts with zero)
3493 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3494 -  v - optional values in the matrix
3495 
3496    Level: developer
3497 
3498 .keywords: matrix, aij, compressed row, sparse
3499 
3500 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3501 @*/
3502 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3503 {
3504   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3505 
3506   PetscFunctionBegin;
3507   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3508   if (f) {
3509     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3510   }
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 
3515 #undef __FUNCT__
3516 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3517 /*@
3518      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3519               (upper triangular entries in CSR format) provided by the user.
3520 
3521      Collective on MPI_Comm
3522 
3523    Input Parameters:
3524 +  comm - must be an MPI communicator of size 1
3525 .  bs - size of block
3526 .  m - number of rows
3527 .  n - number of columns
3528 .  i - row indices
3529 .  j - column indices
3530 -  a - matrix values
3531 
3532    Output Parameter:
3533 .  mat - the matrix
3534 
3535    Level: intermediate
3536 
3537    Notes:
3538        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3539     once the matrix is destroyed
3540 
3541        You cannot set new nonzero locations into this matrix, that will generate an error.
3542 
3543        The i and j indices are 0 based
3544 
3545 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3546 
3547 @*/
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3549 {
3550   PetscErrorCode ierr;
3551   PetscInt       ii;
3552   Mat_SeqBAIJ    *baij;
3553 
3554   PetscFunctionBegin;
3555   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3556   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557 
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3561   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3562   baij = (Mat_SeqBAIJ*)(*mat)->data;
3563   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3564 
3565   baij->i = i;
3566   baij->j = j;
3567   baij->a = a;
3568   baij->singlemalloc = PETSC_FALSE;
3569   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3570   baij->free_a       = PETSC_FALSE;
3571   baij->free_ij       = PETSC_FALSE;
3572 
3573   for (ii=0; ii<m; ii++) {
3574     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3575 #if defined(PETSC_USE_DEBUG)
3576     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3577 #endif
3578   }
3579 #if defined(PETSC_USE_DEBUG)
3580   for (ii=0; ii<baij->i[m]; ii++) {
3581     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3582     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3583   }
3584 #endif
3585 
3586   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3587   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590