xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 70dcbbb9ec575d8ef59c8591bf18c9c2e6e303fc)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,idx;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         while (nz--) {
175           idx  = (*vi++);
176           x1   = x[idx];
177           s1  -= v[0]*x1;
178           v   += 1;
179         }
180         x[i2]   = idiag[0]*s1;
181         idiag   += 1;
182         i2      += 1;
183       }
184       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
185       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
186     }
187     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
188         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
189       i2    = 0;
190       mdiag = a->idiag+a->mbs;
191       for (i=0; i<m; i++) {
192         x1      = x[i2];
193         x[i2]   = mdiag[0]*x1;
194         mdiag  += 1;
195         i2     += 1;
196       }
197       ierr = PetscLogFlops(m);CHKERRQ(ierr);
198     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
199       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
200     }
201     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
202       idiag   = a->idiag+a->mbs - 1;
203       i2      = m - 1;
204       x1      = x[i2];
205       x[i2]   = idiag[0]*x1;
206       idiag -= 1;
207       i2    -= 1;
208       for (i=m-2; i>=0; i--) {
209         v     = aa + (diag[i]+1);
210         vi    = aj + diag[i] + 1;
211         nz    = ai[i+1] - diag[i] - 1;
212         s1    = x[i2];
213         while (nz--) {
214           idx  = (*vi++);
215           x1   = x[idx];
216           s1  -= v[0]*x1;
217           v   += 1;
218         }
219         x[i2]   = idiag[0]*s1;
220         idiag   -= 1;
221         i2      -= 1;
222       }
223       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
224     }
225   } else {
226     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
227   }
228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
229   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar        *x,x1,x2,s1,s2;
239   const PetscScalar  *b;
240   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
241   PetscErrorCode     ierr;
242   PetscInt           m = a->mbs,i,i2,nz,idx;
243   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
244 
245   PetscFunctionBegin;
246   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
247   its = its*lits;
248   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
249   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
250   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
251   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
252   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
253 
254   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
255 
256   diag  = a->diag;
257   idiag = a->idiag;
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
260 
261   if (flag & SOR_ZERO_INITIAL_GUESS) {
262     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
263       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
264       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
265       i2     = 2;
266       idiag += 4;
267       for (i=1; i<m; i++) {
268 	v     = aa + 4*ai[i];
269 	vi    = aj + ai[i];
270 	nz    = diag[i] - ai[i];
271 	s1    = b[i2]; s2 = b[i2+1];
272 	while (nz--) {
273 	  idx  = 2*(*vi++);
274 	  x1   = x[idx]; x2 = x[1+idx];
275 	  s1  -= v[0]*x1 + v[2]*x2;
276 	  s2  -= v[1]*x1 + v[3]*x2;
277 	  v   += 4;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
280 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
281         idiag   += 4;
282         i2      += 2;
283       }
284       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
285       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
286     }
287     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
288         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
289       i2    = 0;
290       mdiag = a->idiag+4*a->mbs;
291       for (i=0; i<m; i++) {
292         x1      = x[i2]; x2 = x[i2+1];
293         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
294         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
295         mdiag  += 4;
296         i2     += 2;
297       }
298       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
299     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
300       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
301     }
302     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
303       idiag   = a->idiag+4*a->mbs - 4;
304       i2      = 2*m - 2;
305       x1      = x[i2]; x2 = x[i2+1];
306       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
307       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
308       idiag -= 4;
309       i2    -= 2;
310       for (i=m-2; i>=0; i--) {
311 	v     = aa + 4*(diag[i]+1);
312 	vi    = aj + diag[i] + 1;
313 	nz    = ai[i+1] - diag[i] - 1;
314 	s1    = x[i2]; s2 = x[i2+1];
315 	while (nz--) {
316 	  idx  = 2*(*vi++);
317 	  x1   = x[idx]; x2 = x[1+idx];
318 	  s1  -= v[0]*x1 + v[2]*x2;
319 	  s2  -= v[1]*x1 + v[3]*x2;
320 	  v   += 4;
321 	}
322 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
323 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
324         idiag   -= 4;
325         i2      -= 2;
326       }
327       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
328     }
329   } else {
330     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
331   }
332   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
333   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
340 {
341   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
343   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
344   const PetscScalar  *b;
345   PetscErrorCode     ierr;
346   PetscInt           m = a->mbs,i,i2,nz,idx;
347   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
348 
349   PetscFunctionBegin;
350   its = its*lits;
351   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
352   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
353   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
354   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
355   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
356   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
357 
358   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
359 
360   diag  = a->diag;
361   idiag = a->idiag;
362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
364 
365   if (flag & SOR_ZERO_INITIAL_GUESS) {
366     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
367       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
368       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
369       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
370       i2     = 3;
371       idiag += 9;
372       for (i=1; i<m; i++) {
373         v     = aa + 9*ai[i];
374         vi    = aj + ai[i];
375         nz    = diag[i] - ai[i];
376         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
377         while (nz--) {
378           idx  = 3*(*vi++);
379           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
380           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
381           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
382           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
383           v   += 9;
384         }
385         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
386         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
387         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
388         idiag   += 9;
389         i2      += 3;
390       }
391       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
392       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
393     }
394     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
395         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
396       i2    = 0;
397       mdiag = a->idiag+9*a->mbs;
398       for (i=0; i<m; i++) {
399         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
400         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
401         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
402         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
403         mdiag  += 9;
404         i2     += 3;
405       }
406       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
407     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
408       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
409     }
410     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
411       idiag   = a->idiag+9*a->mbs - 9;
412       i2      = 3*m - 3;
413       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
414       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
415       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
416       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
417       idiag -= 9;
418       i2    -= 3;
419       for (i=m-2; i>=0; i--) {
420         v     = aa + 9*(diag[i]+1);
421         vi    = aj + diag[i] + 1;
422         nz    = ai[i+1] - diag[i] - 1;
423         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
424         while (nz--) {
425           idx  = 3*(*vi++);
426           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
427           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
428           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
429           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
430           v   += 9;
431         }
432         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
433         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
434         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
435         idiag   -= 9;
436         i2      -= 3;
437       }
438       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
439     }
440   } else {
441     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
442   }
443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
451 {
452   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
454   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
455   const PetscScalar  *b;
456   PetscErrorCode     ierr;
457   PetscInt           m = a->mbs,i,i2,nz,idx;
458   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
459 
460   PetscFunctionBegin;
461   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
462   its = its*lits;
463   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
464   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
465   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
466   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
467   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
468 
469   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
470 
471   diag  = a->diag;
472   idiag = a->idiag;
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
475 
476   if (flag & SOR_ZERO_INITIAL_GUESS) {
477     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
478       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
479       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
480       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
481       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
482       i2     = 4;
483       idiag += 16;
484       for (i=1; i<m; i++) {
485 	v     = aa + 16*ai[i];
486 	vi    = aj + ai[i];
487 	nz    = diag[i] - ai[i];
488 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
489 	while (nz--) {
490 	  idx  = 4*(*vi++);
491 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
492 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
493 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
494 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
495 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
496 	  v   += 16;
497 	}
498 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
499 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
500 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
501 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
502         idiag   += 16;
503         i2      += 4;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+16*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
515         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
516         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
517         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
518         mdiag  += 16;
519         i2     += 4;
520       }
521       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
522     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
523       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
524     }
525     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
526       idiag   = a->idiag+16*a->mbs - 16;
527       i2      = 4*m - 4;
528       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
529       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
530       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
531       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
532       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
533       idiag -= 16;
534       i2    -= 4;
535       for (i=m-2; i>=0; i--) {
536 	v     = aa + 16*(diag[i]+1);
537 	vi    = aj + diag[i] + 1;
538 	nz    = ai[i+1] - diag[i] - 1;
539 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
540 	while (nz--) {
541 	  idx  = 4*(*vi++);
542 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
543 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
544 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
545 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
546 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
547 	  v   += 16;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
550 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
551 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
552 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
553         idiag   -= 16;
554         i2      -= 4;
555       }
556       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
569 {
570   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
571   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
572   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
573   const PetscScalar  *b;
574   PetscErrorCode     ierr;
575   PetscInt           m = a->mbs,i,i2,nz,idx;
576   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
577 
578   PetscFunctionBegin;
579   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
580   its = its*lits;
581   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
582   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
583   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
584   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
585   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
586 
587   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
588 
589   diag  = a->diag;
590   idiag = a->idiag;
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
593 
594   if (flag & SOR_ZERO_INITIAL_GUESS) {
595     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
596       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
597       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
598       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
599       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
600       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
601       i2     = 5;
602       idiag += 25;
603       for (i=1; i<m; i++) {
604 	v     = aa + 25*ai[i];
605 	vi    = aj + ai[i];
606 	nz    = diag[i] - ai[i];
607 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
608 	while (nz--) {
609 	  idx  = 5*(*vi++);
610 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
611 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
612 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
613 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
614 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
615 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
616 	  v   += 25;
617 	}
618 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
619 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
620 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
621 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
622 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
623         idiag   += 25;
624         i2      += 5;
625       }
626       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
627       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
628     }
629     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
630         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
631       i2    = 0;
632       mdiag = a->idiag+25*a->mbs;
633       for (i=0; i<m; i++) {
634         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
635         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
636         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
637         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
638         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
639         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
640         mdiag  += 25;
641         i2     += 5;
642       }
643       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
644     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
645       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
646     }
647     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
648       idiag   = a->idiag+25*a->mbs - 25;
649       i2      = 5*m - 5;
650       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
651       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
652       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
653       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
654       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
655       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
656       idiag -= 25;
657       i2    -= 5;
658       for (i=m-2; i>=0; i--) {
659 	v     = aa + 25*(diag[i]+1);
660 	vi    = aj + diag[i] + 1;
661 	nz    = ai[i+1] - diag[i] - 1;
662 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
663 	while (nz--) {
664 	  idx  = 5*(*vi++);
665 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
666 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
667 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
668 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
669 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
670 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
671 	  v   += 25;
672 	}
673 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
674 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
675 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
676 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
677 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
678         idiag   -= 25;
679         i2      -= 5;
680       }
681       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
682     }
683   } else {
684     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
685   }
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
696   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
697   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
698   const PetscScalar  *b;
699   PetscErrorCode     ierr;
700   PetscInt           m = a->mbs,i,i2,nz,idx;
701   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
702 
703   PetscFunctionBegin;
704   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
705   its = its*lits;
706   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
707   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
708   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
711 
712   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
713 
714   diag  = a->diag;
715   idiag = a->idiag;
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
718 
719   if (flag & SOR_ZERO_INITIAL_GUESS) {
720     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
721       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
722       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
723       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
724       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
725       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
726       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
727       i2     = 6;
728       idiag += 36;
729       for (i=1; i<m; i++) {
730         v     = aa + 36*ai[i];
731         vi    = aj + ai[i];
732         nz    = diag[i] - ai[i];
733         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
734         while (nz--) {
735           idx  = 6*(*vi++);
736           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
737           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
738           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
739           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
740           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
741           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
742           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
743           v   += 36;
744         }
745         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
746         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
747         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
748         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
749         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
750         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
751         idiag   += 36;
752         i2      += 6;
753       }
754       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
755       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
756     }
757     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
758         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
759       i2    = 0;
760       mdiag = a->idiag+36*a->mbs;
761       for (i=0; i<m; i++) {
762         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
763         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
764         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
765         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
766         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
767         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
768         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
769         mdiag  += 36;
770         i2     += 6;
771       }
772       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
773     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
774       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
777       idiag   = a->idiag+36*a->mbs - 36;
778       i2      = 6*m - 6;
779       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
780       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
781       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
782       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
783       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
784       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
785       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
786       idiag -= 36;
787       i2    -= 6;
788       for (i=m-2; i>=0; i--) {
789         v     = aa + 36*(diag[i]+1);
790         vi    = aj + diag[i] + 1;
791         nz    = ai[i+1] - diag[i] - 1;
792         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
793         while (nz--) {
794           idx  = 6*(*vi++);
795           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
796           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
797           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
798           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
799           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
800           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
801           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
802           v   += 36;
803         }
804         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
805         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
806         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
807         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
808         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
809         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
810         idiag   -= 36;
811         i2      -= 6;
812       }
813       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
814     }
815   } else {
816     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
817   }
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
826 {
827   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
828   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
829   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
830   const PetscScalar  *b;
831   PetscErrorCode     ierr;
832   PetscInt           m = a->mbs,i,i2,nz,idx;
833   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
834 
835   PetscFunctionBegin;
836   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
837   its = its*lits;
838   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
840   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
841   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
842   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
843 
844   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
845 
846   diag  = a->diag;
847   idiag = a->idiag;
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
850 
851   if (flag & SOR_ZERO_INITIAL_GUESS) {
852     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
853       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
854       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
855       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
856       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
857       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
858       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
859       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
860       i2     = 7;
861       idiag += 49;
862       for (i=1; i<m; i++) {
863         v     = aa + 49*ai[i];
864         vi    = aj + ai[i];
865         nz    = diag[i] - ai[i];
866         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
867         while (nz--) {
868           idx  = 7*(*vi++);
869           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
870           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
871           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
872           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
873           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
874           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
875           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
876           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
877           v   += 49;
878         }
879         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
880         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
881         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
882         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
883         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
884         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
885         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
886         idiag   += 49;
887         i2      += 7;
888       }
889       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
890       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
891     }
892     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
893         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
894       i2    = 0;
895       mdiag = a->idiag+49*a->mbs;
896       for (i=0; i<m; i++) {
897         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
899         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
900         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
901         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
902         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
903         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
904         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
905         mdiag  += 36;
906         i2     += 6;
907       }
908       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
909     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
910       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
911     }
912     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
913       idiag   = a->idiag+49*a->mbs - 49;
914       i2      = 7*m - 7;
915       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
916       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
917       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
918       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
919       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
920       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
921       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
922       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
923       idiag -= 49;
924       i2    -= 7;
925       for (i=m-2; i>=0; i--) {
926         v     = aa + 49*(diag[i]+1);
927         vi    = aj + diag[i] + 1;
928         nz    = ai[i+1] - diag[i] - 1;
929         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
930         while (nz--) {
931           idx  = 7*(*vi++);
932           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
933           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
934           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
935           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
936           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
937           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
938           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
939           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
940           v   += 49;
941         }
942         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
943         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
944         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
945         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
946         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
947         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
948         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
949         idiag   -= 49;
950         i2      -= 7;
951       }
952       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
953     }
954   } else {
955     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
956   }
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*
963     Special version for direct calls from Fortran (Used in PETSc-fun3d)
964 */
965 #if defined(PETSC_HAVE_FORTRAN_CAPS)
966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
968 #define matsetvaluesblocked4_ matsetvaluesblocked4
969 #endif
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "matsetvaluesblocked4_"
974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
975 {
976   Mat               A = *AA;
977   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
979   PetscInt          *ai=a->i,*ailen=a->ilen;
980   PetscInt          *aj=a->j,stepval,lastcol = -1;
981   const PetscScalar *value = v;
982   MatScalar         *ap,*aa = a->a,*bap;
983 
984   PetscFunctionBegin;
985   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
986   stepval = (n-1)*4;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k];
989     rp   = aj + ai[row];
990     ap   = aa + 16*ai[row];
991     nrow = ailen[row];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col = in[l];
996       if (col <= lastcol) low = 0; else high = nrow;
997       lastcol = col;
998       value = v + k*(stepval+4 + l)*4;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > col) high = t;
1002         else             low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > col) break;
1006         if (rp[i] == col) {
1007           bap  = ap +  16*i;
1008           for (ii=0; ii<4; ii++,value+=stepval) {
1009             for (jj=ii; jj<16; jj+=4) {
1010               bap[jj] += *value++;
1011             }
1012           }
1013           goto noinsert2;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column index thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N >= i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i] = col;
1027       bap   = ap +  16*i;
1028       for (ii=0; ii<4; ii++,value+=stepval) {
1029         for (jj=ii; jj<16; jj+=4) {
1030           bap[jj] = *value++;
1031         }
1032       }
1033       noinsert2:;
1034       low = i;
1035     }
1036     ailen[row] = nrow;
1037   }
1038   PetscFunctionReturnVoid();
1039 }
1040 EXTERN_C_END
1041 
1042 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1043 #define matsetvalues4_ MATSETVALUES4
1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1045 #define matsetvalues4_ matsetvalues4
1046 #endif
1047 
1048 EXTERN_C_BEGIN
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatSetValues4_"
1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1052 {
1053   Mat         A = *AA;
1054   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1055   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1056   PetscInt    *ai=a->i,*ailen=a->ilen;
1057   PetscInt    *aj=a->j,brow,bcol;
1058   PetscInt    ridx,cidx,lastcol = -1;
1059   MatScalar   *ap,value,*aa=a->a,*bap;
1060 
1061   PetscFunctionBegin;
1062   for (k=0; k<m; k++) { /* loop over added rows */
1063     row  = im[k]; brow = row/4;
1064     rp   = aj + ai[brow];
1065     ap   = aa + 16*ai[brow];
1066     nrow = ailen[brow];
1067     low  = 0;
1068     high = nrow;
1069     for (l=0; l<n; l++) { /* loop over added columns */
1070       col = in[l]; bcol = col/4;
1071       ridx = row % 4; cidx = col % 4;
1072       value = v[l + k*n];
1073       if (col <= lastcol) low = 0; else high = nrow;
1074       lastcol = col;
1075       while (high-low > 7) {
1076         t = (low+high)/2;
1077         if (rp[t] > bcol) high = t;
1078         else              low  = t;
1079       }
1080       for (i=low; i<high; i++) {
1081         if (rp[i] > bcol) break;
1082         if (rp[i] == bcol) {
1083           bap  = ap +  16*i + 4*cidx + ridx;
1084           *bap += value;
1085           goto noinsert1;
1086         }
1087       }
1088       N = nrow++ - 1;
1089       high++; /* added new column thus must search to one higher than before */
1090       /* shift up all the later entries in this row */
1091       for (ii=N; ii>=i; ii--) {
1092         rp[ii+1] = rp[ii];
1093         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1094       }
1095       if (N>=i) {
1096         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1097       }
1098       rp[i]                    = bcol;
1099       ap[16*i + 4*cidx + ridx] = value;
1100       noinsert1:;
1101       low = i;
1102     }
1103     ailen[brow] = nrow;
1104   }
1105   PetscFunctionReturnVoid();
1106 }
1107 EXTERN_C_END
1108 
1109 /*
1110      Checks for missing diagonals
1111 */
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       *diag,*jj = a->j,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1122   *missing = PETSC_FALSE;
1123   if (A->rmap->n > 0 && !jj) {
1124     *missing  = PETSC_TRUE;
1125     if (d) *d = 0;
1126     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1127   } else {
1128     diag     = a->diag;
1129     for (i=0; i<a->mbs; i++) {
1130       if (jj[diag[i]] != i) {
1131         *missing  = PETSC_TRUE;
1132         if (d) *d = i;
1133         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1134       }
1135     }
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1143 {
1144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,j,m = a->mbs;
1147 
1148   PetscFunctionBegin;
1149   if (!a->diag) {
1150     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1151   }
1152   for (i=0; i<m; i++) {
1153     a->diag[i] = a->i[i+1];
1154     for (j=a->i[i]; j<a->i[i+1]; j++) {
1155       if (a->j[j] == i) {
1156         a->diag[i] = j;
1157         break;
1158       }
1159     }
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 
1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1170 {
1171   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1174   PetscInt       *tia, *tja;
1175 
1176   PetscFunctionBegin;
1177   *nn = n;
1178   if (!ia) PetscFunctionReturn(0);
1179   if (symmetric) {
1180     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1181   } else {
1182     tia = a->i; tja = a->j;
1183   }
1184 
1185   if (!blockcompressed && bs > 1) {
1186     (*nn) *= bs;
1187     nbs    = bs;
1188     /* malloc & create the natural set of indices */
1189     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1190     if (n) {
1191       (*ia)[0] = 0;
1192       for (j=1; j<bs; j++) {
1193         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1194       }
1195     }
1196 
1197     for (i=1; i<n; i++) {
1198       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1199       for (j=1; j<bs; j++) {
1200         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1201       }
1202     }
1203     if (n) {
1204       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1205     }
1206 
1207     if (ja) {
1208       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1209       cnt = 0;
1210       for (i=0; i<n; i++) {
1211         for (j=0; j<bs; j++) {
1212           for (k=tia[i]; k<tia[i+1]; k++) {
1213             for (l=0; l<bs; l++) {
1214               (*ja)[cnt++] = bs*tja[k] + l;
1215 	    }
1216           }
1217         }
1218       }
1219     }
1220 
1221     n     *= bs;
1222     nz *= bs*bs;
1223     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1224       ierr = PetscFree(tia);CHKERRQ(ierr);
1225       ierr = PetscFree(tja);CHKERRQ(ierr);
1226     }
1227   } else if (oshift == 1) {
1228     if (symmetric) {
1229       PetscInt nz = tia[A->rmap->n/bs];
1230       /*  add 1 to i and j indices */
1231       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1232       *ia = tia;
1233       if (ja) {
1234 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1235         *ja = tja;
1236       }
1237     } else {
1238       PetscInt nz = a->i[A->rmap->n/bs];
1239       /* malloc space and  add 1 to i and j indices */
1240       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1241       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1242       if (ja) {
1243 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1244 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1245       }
1246     }
1247   } else {
1248     *ia = tia;
1249     if (ja) *ja = tja;
1250   }
1251 
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1257 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1258 {
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   if (!ia) PetscFunctionReturn(0);
1263   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1264     ierr = PetscFree(*ia);CHKERRQ(ierr);
1265     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1272 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1273 {
1274   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278 #if defined(PETSC_USE_LOG)
1279   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1280 #endif
1281   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1282   if (a->row) {
1283     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1284   }
1285   if (a->col) {
1286     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1287   }
1288   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1289   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1290   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1291   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1292   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1293   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1294   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1295   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1296   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1297 
1298   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1299   ierr = PetscFree(a);CHKERRQ(ierr);
1300 
1301   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1315 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1316 {
1317   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   switch (op) {
1322   case MAT_ROW_ORIENTED:
1323     a->roworiented    = flg;
1324     break;
1325   case MAT_KEEP_NONZERO_PATTERN:
1326     a->keepnonzeropattern = flg;
1327     break;
1328   case MAT_NEW_NONZERO_LOCATIONS:
1329     a->nonew          = (flg ? 0 : 1);
1330     break;
1331   case MAT_NEW_NONZERO_LOCATION_ERR:
1332     a->nonew          = (flg ? -1 : 0);
1333     break;
1334   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1335     a->nonew          = (flg ? -2 : 0);
1336     break;
1337   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1338     a->nounused       = (flg ? -1 : 0);
1339     break;
1340   case MAT_NEW_DIAGONALS:
1341   case MAT_IGNORE_OFF_PROC_ENTRIES:
1342   case MAT_USE_HASH_TABLE:
1343     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1344     break;
1345   case MAT_SYMMETRIC:
1346   case MAT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_HERMITIAN:
1348   case MAT_SYMMETRY_ETERNAL:
1349     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1350     break;
1351   default:
1352     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1359 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1360 {
1361   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1362   PetscErrorCode ierr;
1363   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1364   MatScalar      *aa,*aa_i;
1365   PetscScalar    *v_i;
1366 
1367   PetscFunctionBegin;
1368   bs  = A->rmap->bs;
1369   ai  = a->i;
1370   aj  = a->j;
1371   aa  = a->a;
1372   bs2 = a->bs2;
1373 
1374   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1375 
1376   bn  = row/bs;   /* Block number */
1377   bp  = row % bs; /* Block Position */
1378   M   = ai[bn+1] - ai[bn];
1379   *nz = bs*M;
1380 
1381   if (v) {
1382     *v = 0;
1383     if (*nz) {
1384       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1385       for (i=0; i<M; i++) { /* for each block in the block row */
1386         v_i  = *v + i*bs;
1387         aa_i = aa + bs2*(ai[bn] + i);
1388         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1389       }
1390     }
1391   }
1392 
1393   if (idx) {
1394     *idx = 0;
1395     if (*nz) {
1396       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1397       for (i=0; i<M; i++) { /* for each block in the block row */
1398         idx_i = *idx + i*bs;
1399         itmp  = bs*aj[ai[bn] + i];
1400         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1401       }
1402     }
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1409 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410 {
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1415   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1422 {
1423   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1424   Mat            C;
1425   PetscErrorCode ierr;
1426   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1427   PetscInt       *rows,*cols,bs2=a->bs2;
1428   MatScalar      *array;
1429 
1430   PetscFunctionBegin;
1431   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1432   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1433     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1434     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1435 
1436     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1437     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1438     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1439     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1440     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1441     ierr = PetscFree(col);CHKERRQ(ierr);
1442   } else {
1443     C = *B;
1444   }
1445 
1446   array = a->a;
1447   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1448   cols = rows + bs;
1449   for (i=0; i<mbs; i++) {
1450     cols[0] = i*bs;
1451     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1452     len = ai[i+1] - ai[i];
1453     for (j=0; j<len; j++) {
1454       rows[0] = (*aj++)*bs;
1455       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1456       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1457       array += bs2;
1458     }
1459   }
1460   ierr = PetscFree(rows);CHKERRQ(ierr);
1461 
1462   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464 
1465   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1466     *B = C;
1467   } else {
1468     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1475 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1476 {
1477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1480   int            fd;
1481   PetscScalar    *aa;
1482   FILE           *file;
1483 
1484   PetscFunctionBegin;
1485   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1486   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1487   col_lens[0] = MAT_FILE_COOKIE;
1488 
1489   col_lens[1] = A->rmap->N;
1490   col_lens[2] = A->cmap->n;
1491   col_lens[3] = a->nz*bs2;
1492 
1493   /* store lengths of each row and write (including header) to file */
1494   count = 0;
1495   for (i=0; i<a->mbs; i++) {
1496     for (j=0; j<bs; j++) {
1497       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1498     }
1499   }
1500   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1501   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1502 
1503   /* store column indices (zero start index) */
1504   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1505   count = 0;
1506   for (i=0; i<a->mbs; i++) {
1507     for (j=0; j<bs; j++) {
1508       for (k=a->i[i]; k<a->i[i+1]; k++) {
1509         for (l=0; l<bs; l++) {
1510           jj[count++] = bs*a->j[k] + l;
1511         }
1512       }
1513     }
1514   }
1515   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1516   ierr = PetscFree(jj);CHKERRQ(ierr);
1517 
1518   /* store nonzero values */
1519   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1520   count = 0;
1521   for (i=0; i<a->mbs; i++) {
1522     for (j=0; j<bs; j++) {
1523       for (k=a->i[i]; k<a->i[i+1]; k++) {
1524         for (l=0; l<bs; l++) {
1525           aa[count++] = a->a[bs2*k + l*bs + j];
1526         }
1527       }
1528     }
1529   }
1530   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1531   ierr = PetscFree(aa);CHKERRQ(ierr);
1532 
1533   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1534   if (file) {
1535     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1543 {
1544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1545   PetscErrorCode    ierr;
1546   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1547   PetscViewerFormat format;
1548 
1549   PetscFunctionBegin;
1550   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1551   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1552     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1553   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1554     Mat aij;
1555     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1556     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1557     ierr = MatDestroy(aij);CHKERRQ(ierr);
1558   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1559      PetscFunctionReturn(0);
1560   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1562     for (i=0; i<a->mbs; i++) {
1563       for (j=0; j<bs; j++) {
1564         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1565         for (k=a->i[i]; k<a->i[i+1]; k++) {
1566           for (l=0; l<bs; l++) {
1567 #if defined(PETSC_USE_COMPLEX)
1568             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1570                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1573                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1575               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             }
1577 #else
1578             if (a->a[bs2*k + l*bs + j] != 0.0) {
1579               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1580             }
1581 #endif
1582           }
1583         }
1584         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1585       }
1586     }
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1588   } else {
1589     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1590     for (i=0; i<a->mbs; i++) {
1591       for (j=0; j<bs; j++) {
1592         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1593         for (k=a->i[i]; k<a->i[i+1]; k++) {
1594           for (l=0; l<bs; l++) {
1595 #if defined(PETSC_USE_COMPLEX)
1596             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1597               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1598                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1599             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1600               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1601                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             } else {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1604             }
1605 #else
1606             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1607 #endif
1608           }
1609         }
1610         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1611       }
1612     }
1613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1614   }
1615   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1621 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1622 {
1623   Mat            A = (Mat) Aa;
1624   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1625   PetscErrorCode ierr;
1626   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1627   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1628   MatScalar      *aa;
1629   PetscViewer    viewer;
1630 
1631   PetscFunctionBegin;
1632 
1633   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1634   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1635 
1636   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1637 
1638   /* loop over matrix elements drawing boxes */
1639   color = PETSC_DRAW_BLUE;
1640   for (i=0,row=0; i<mbs; i++,row+=bs) {
1641     for (j=a->i[i]; j<a->i[i+1]; j++) {
1642       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644       aa = a->a + j*bs2;
1645       for (k=0; k<bs; k++) {
1646         for (l=0; l<bs; l++) {
1647           if (PetscRealPart(*aa++) >=  0.) continue;
1648           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649         }
1650       }
1651     }
1652   }
1653   color = PETSC_DRAW_CYAN;
1654   for (i=0,row=0; i<mbs; i++,row+=bs) {
1655     for (j=a->i[i]; j<a->i[i+1]; j++) {
1656       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1657       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1658       aa = a->a + j*bs2;
1659       for (k=0; k<bs; k++) {
1660         for (l=0; l<bs; l++) {
1661           if (PetscRealPart(*aa++) != 0.) continue;
1662           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1663         }
1664       }
1665     }
1666   }
1667 
1668   color = PETSC_DRAW_RED;
1669   for (i=0,row=0; i<mbs; i++,row+=bs) {
1670     for (j=a->i[i]; j<a->i[i+1]; j++) {
1671       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1672       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1673       aa = a->a + j*bs2;
1674       for (k=0; k<bs; k++) {
1675         for (l=0; l<bs; l++) {
1676           if (PetscRealPart(*aa++) <= 0.) continue;
1677           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1678         }
1679       }
1680     }
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscReal      xl,yl,xr,yr,w,h;
1691   PetscDraw      draw;
1692   PetscTruth     isnull;
1693 
1694   PetscFunctionBegin;
1695 
1696   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1697   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1698 
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1700   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701   xr += w;    yr += h;  xl = -w;     yl = -h;
1702   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ"
1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1711 {
1712   PetscErrorCode ierr;
1713   PetscTruth     iascii,isbinary,isdraw;
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1717   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1718   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1719   if (iascii){
1720     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1721   } else if (isbinary) {
1722     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1723   } else if (isdraw) {
1724     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1725   } else {
1726     Mat B;
1727     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1728     ierr = MatView(B,viewer);CHKERRQ(ierr);
1729     ierr = MatDestroy(B);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1738 {
1739   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1740   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1741   PetscInt    *ai = a->i,*ailen = a->ilen;
1742   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1743   MatScalar   *ap,*aa = a->a;
1744 
1745   PetscFunctionBegin;
1746   for (k=0; k<m; k++) { /* loop over rows */
1747     row  = im[k]; brow = row/bs;
1748     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1749     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1750     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1751     nrow = ailen[brow];
1752     for (l=0; l<n; l++) { /* loop over columns */
1753       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1754       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1755       col  = in[l] ;
1756       bcol = col/bs;
1757       cidx = col%bs;
1758       ridx = row%bs;
1759       high = nrow;
1760       low  = 0; /* assume unsorted */
1761       while (high-low > 5) {
1762         t = (low+high)/2;
1763         if (rp[t] > bcol) high = t;
1764         else             low  = t;
1765       }
1766       for (i=low; i<high; i++) {
1767         if (rp[i] > bcol) break;
1768         if (rp[i] == bcol) {
1769           *v++ = ap[bs2*i+bs*cidx+ridx];
1770           goto finished;
1771         }
1772       }
1773       *v++ = 0.0;
1774       finished:;
1775     }
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #define CHUNKSIZE 10
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1784 {
1785   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1786   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1787   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1788   PetscErrorCode    ierr;
1789   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1790   PetscTruth        roworiented=a->roworiented;
1791   const PetscScalar *value = v;
1792   MatScalar         *ap,*aa = a->a,*bap;
1793 
1794   PetscFunctionBegin;
1795   if (roworiented) {
1796     stepval = (n-1)*bs;
1797   } else {
1798     stepval = (m-1)*bs;
1799   }
1800   for (k=0; k<m; k++) { /* loop over added rows */
1801     row  = im[k];
1802     if (row < 0) continue;
1803 #if defined(PETSC_USE_DEBUG)
1804     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1805 #endif
1806     rp   = aj + ai[row];
1807     ap   = aa + bs2*ai[row];
1808     rmax = imax[row];
1809     nrow = ailen[row];
1810     low  = 0;
1811     high = nrow;
1812     for (l=0; l<n; l++) { /* loop over added columns */
1813       if (in[l] < 0) continue;
1814 #if defined(PETSC_USE_DEBUG)
1815       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1816 #endif
1817       col = in[l];
1818       if (roworiented) {
1819         value = v + k*(stepval+bs)*bs + l*bs;
1820       } else {
1821         value = v + l*(stepval+bs)*bs + k*bs;
1822       }
1823       if (col <= lastcol) low = 0; else high = nrow;
1824       lastcol = col;
1825       while (high-low > 7) {
1826         t = (low+high)/2;
1827         if (rp[t] > col) high = t;
1828         else             low  = t;
1829       }
1830       for (i=low; i<high; i++) {
1831         if (rp[i] > col) break;
1832         if (rp[i] == col) {
1833           bap  = ap +  bs2*i;
1834           if (roworiented) {
1835             if (is == ADD_VALUES) {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] += *value++;
1839                 }
1840               }
1841             } else {
1842               for (ii=0; ii<bs; ii++,value+=stepval) {
1843                 for (jj=ii; jj<bs2; jj+=bs) {
1844                   bap[jj] = *value++;
1845                 }
1846               }
1847             }
1848           } else {
1849             if (is == ADD_VALUES) {
1850               for (ii=0; ii<bs; ii++,value+=stepval) {
1851                 for (jj=0; jj<bs; jj++) {
1852                   *bap++ += *value++;
1853                 }
1854               }
1855             } else {
1856               for (ii=0; ii<bs; ii++,value+=stepval) {
1857                 for (jj=0; jj<bs; jj++) {
1858                   *bap++  = *value++;
1859                 }
1860               }
1861             }
1862           }
1863           goto noinsert2;
1864         }
1865       }
1866       if (nonew == 1) goto noinsert2;
1867       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1868       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1869       N = nrow++ - 1; high++;
1870       /* shift up all the later entries in this row */
1871       for (ii=N; ii>=i; ii--) {
1872         rp[ii+1] = rp[ii];
1873         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1874       }
1875       if (N >= i) {
1876         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1877       }
1878       rp[i] = col;
1879       bap   = ap +  bs2*i;
1880       if (roworiented) {
1881         for (ii=0; ii<bs; ii++,value+=stepval) {
1882           for (jj=ii; jj<bs2; jj+=bs) {
1883             bap[jj] = *value++;
1884           }
1885         }
1886       } else {
1887         for (ii=0; ii<bs; ii++,value+=stepval) {
1888           for (jj=0; jj<bs; jj++) {
1889             *bap++  = *value++;
1890           }
1891         }
1892       }
1893       noinsert2:;
1894       low = i;
1895     }
1896     ailen[row] = nrow;
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1903 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1904 {
1905   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1906   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1907   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1908   PetscErrorCode ierr;
1909   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1910   MatScalar      *aa = a->a,*ap;
1911   PetscReal      ratio=0.6;
1912 
1913   PetscFunctionBegin;
1914   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1915 
1916   if (m) rmax = ailen[0];
1917   for (i=1; i<mbs; i++) {
1918     /* move each row back by the amount of empty slots (fshift) before it*/
1919     fshift += imax[i-1] - ailen[i-1];
1920     rmax   = PetscMax(rmax,ailen[i]);
1921     if (fshift) {
1922       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1923       N = ailen[i];
1924       for (j=0; j<N; j++) {
1925         ip[j-fshift] = ip[j];
1926         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1927       }
1928     }
1929     ai[i] = ai[i-1] + ailen[i-1];
1930   }
1931   if (mbs) {
1932     fshift += imax[mbs-1] - ailen[mbs-1];
1933     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1934   }
1935   /* reset ilen and imax for each row */
1936   for (i=0; i<mbs; i++) {
1937     ailen[i] = imax[i] = ai[i+1] - ai[i];
1938   }
1939   a->nz = ai[mbs];
1940 
1941   /* diagonals may have moved, so kill the diagonal pointers */
1942   a->idiagvalid = PETSC_FALSE;
1943   if (fshift && a->diag) {
1944     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1945     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1946     a->diag = 0;
1947   }
1948   if (fshift && a->nounused == -1) {
1949     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1950   }
1951   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1953   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1954   a->reallocs          = 0;
1955   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1956 
1957   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1958   if (a->compressedrow.use){
1959     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1960   }
1961 
1962   A->same_nonzero = PETSC_TRUE;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 /*
1967    This function returns an array of flags which indicate the locations of contiguous
1968    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1969    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1970    Assume: sizes should be long enough to hold all the values.
1971 */
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1975 {
1976   PetscInt   i,j,k,row;
1977   PetscTruth flg;
1978 
1979   PetscFunctionBegin;
1980   for (i=0,j=0; i<n; j++) {
1981     row = idx[i];
1982     if (row%bs!=0) { /* Not the begining of a block */
1983       sizes[j] = 1;
1984       i++;
1985     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1986       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1987       i++;
1988     } else { /* Begining of the block, so check if the complete block exists */
1989       flg = PETSC_TRUE;
1990       for (k=1; k<bs; k++) {
1991         if (row+k != idx[i+k]) { /* break in the block */
1992           flg = PETSC_FALSE;
1993           break;
1994         }
1995       }
1996       if (flg) { /* No break in the bs */
1997         sizes[j] = bs;
1998         i+= bs;
1999       } else {
2000         sizes[j] = 1;
2001         i++;
2002       }
2003     }
2004   }
2005   *bs_max = j;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2012 {
2013   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2014   PetscErrorCode ierr;
2015   PetscInt       i,j,k,count,*rows;
2016   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2017   PetscScalar    zero = 0.0;
2018   MatScalar      *aa;
2019 
2020   PetscFunctionBegin;
2021   /* Make a copy of the IS and  sort it */
2022   /* allocate memory for rows,sizes */
2023   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2024   sizes = rows + is_n;
2025 
2026   /* copy IS values to rows, and sort them */
2027   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2028   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2029   if (baij->keepnonzeropattern) {
2030     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2031     bs_max = is_n;
2032     A->same_nonzero = PETSC_TRUE;
2033   } else {
2034     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2035     A->same_nonzero = PETSC_FALSE;
2036   }
2037 
2038   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2039     row   = rows[j];
2040     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2041     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2042     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2043     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2044       if (diag != 0.0) {
2045         if (baij->ilen[row/bs] > 0) {
2046           baij->ilen[row/bs]       = 1;
2047           baij->j[baij->i[row/bs]] = row/bs;
2048           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2049         }
2050         /* Now insert all the diagonal values for this bs */
2051         for (k=0; k<bs; k++) {
2052           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2053         }
2054       } else { /* (diag == 0.0) */
2055         baij->ilen[row/bs] = 0;
2056       } /* end (diag == 0.0) */
2057     } else { /* (sizes[i] != bs) */
2058 #if defined (PETSC_USE_DEBUG)
2059       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2060 #endif
2061       for (k=0; k<count; k++) {
2062         aa[0] =  zero;
2063         aa    += bs;
2064       }
2065       if (diag != 0.0) {
2066         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2067       }
2068     }
2069   }
2070 
2071   ierr = PetscFree(rows);CHKERRQ(ierr);
2072   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2078 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2079 {
2080   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2081   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2082   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2083   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2084   PetscErrorCode ierr;
2085   PetscInt       ridx,cidx,bs2=a->bs2;
2086   PetscTruth     roworiented=a->roworiented;
2087   MatScalar      *ap,value,*aa=a->a,*bap;
2088 
2089   PetscFunctionBegin;
2090   for (k=0; k<m; k++) { /* loop over added rows */
2091     row  = im[k];
2092     brow = row/bs;
2093     if (row < 0) continue;
2094 #if defined(PETSC_USE_DEBUG)
2095     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2096 #endif
2097     rp   = aj + ai[brow];
2098     ap   = aa + bs2*ai[brow];
2099     rmax = imax[brow];
2100     nrow = ailen[brow];
2101     low  = 0;
2102     high = nrow;
2103     for (l=0; l<n; l++) { /* loop over added columns */
2104       if (in[l] < 0) continue;
2105 #if defined(PETSC_USE_DEBUG)
2106       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2107 #endif
2108       col = in[l]; bcol = col/bs;
2109       ridx = row % bs; cidx = col % bs;
2110       if (roworiented) {
2111         value = v[l + k*n];
2112       } else {
2113         value = v[k + l*m];
2114       }
2115       if (col <= lastcol) low = 0; else high = nrow;
2116       lastcol = col;
2117       while (high-low > 7) {
2118         t = (low+high)/2;
2119         if (rp[t] > bcol) high = t;
2120         else              low  = t;
2121       }
2122       for (i=low; i<high; i++) {
2123         if (rp[i] > bcol) break;
2124         if (rp[i] == bcol) {
2125           bap  = ap +  bs2*i + bs*cidx + ridx;
2126           if (is == ADD_VALUES) *bap += value;
2127           else                  *bap  = value;
2128           goto noinsert1;
2129         }
2130       }
2131       if (nonew == 1) goto noinsert1;
2132       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2133       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2134       N = nrow++ - 1; high++;
2135       /* shift up all the later entries in this row */
2136       for (ii=N; ii>=i; ii--) {
2137         rp[ii+1] = rp[ii];
2138         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2139       }
2140       if (N>=i) {
2141         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2142       }
2143       rp[i]                      = bcol;
2144       ap[bs2*i + bs*cidx + ridx] = value;
2145       a->nz++;
2146       noinsert1:;
2147       low = i;
2148     }
2149     ailen[brow] = nrow;
2150   }
2151   A->same_nonzero = PETSC_FALSE;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2159 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2162   Mat            outA;
2163   PetscErrorCode ierr;
2164   PetscTruth     row_identity,col_identity;
2165 
2166   PetscFunctionBegin;
2167   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2168   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2169   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2170   if (!row_identity || !col_identity) {
2171     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2172   }
2173 
2174   outA          = inA;
2175   inA->factor   = MAT_FACTOR_LU;
2176 
2177   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2178 
2179   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2180   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2181   a->row = row;
2182   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2183   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2184   a->col = col;
2185 
2186   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2187   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2189 
2190   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2191   if (!a->solve_work) {
2192     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2193     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2194   }
2195   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2196 
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 EXTERN_C_BEGIN
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2203 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2204 {
2205   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2206   PetscInt    i,nz,mbs;
2207 
2208   PetscFunctionBegin;
2209   nz  = baij->maxnz/baij->bs2;
2210   mbs = baij->mbs;
2211   for (i=0; i<nz; i++) {
2212     baij->j[i] = indices[i];
2213   }
2214   baij->nz = nz;
2215   for (i=0; i<mbs; i++) {
2216     baij->ilen[i] = baij->imax[i];
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 EXTERN_C_END
2221 
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2224 /*@
2225     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2226        in the matrix.
2227 
2228   Input Parameters:
2229 +  mat - the SeqBAIJ matrix
2230 -  indices - the column indices
2231 
2232   Level: advanced
2233 
2234   Notes:
2235     This can be called if you have precomputed the nonzero structure of the
2236   matrix and want to provide it to the matrix object to improve the performance
2237   of the MatSetValues() operation.
2238 
2239     You MUST have set the correct numbers of nonzeros per row in the call to
2240   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2241 
2242     MUST be called before any calls to MatSetValues();
2243 
2244 @*/
2245 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2246 {
2247   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2248 
2249   PetscFunctionBegin;
2250   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2251   PetscValidPointer(indices,2);
2252   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2253   if (f) {
2254     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2255   } else {
2256     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2257   }
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2263 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2264 {
2265   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2266   PetscErrorCode ierr;
2267   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2268   PetscReal      atmp;
2269   PetscScalar    *x,zero = 0.0;
2270   MatScalar      *aa;
2271   PetscInt       ncols,brow,krow,kcol;
2272 
2273   PetscFunctionBegin;
2274   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2275   bs   = A->rmap->bs;
2276   aa   = a->a;
2277   ai   = a->i;
2278   aj   = a->j;
2279   mbs  = a->mbs;
2280 
2281   ierr = VecSet(v,zero);CHKERRQ(ierr);
2282   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2283   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2284   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2285   for (i=0; i<mbs; i++) {
2286     ncols = ai[1] - ai[0]; ai++;
2287     brow  = bs*i;
2288     for (j=0; j<ncols; j++){
2289       for (kcol=0; kcol<bs; kcol++){
2290         for (krow=0; krow<bs; krow++){
2291           atmp = PetscAbsScalar(*aa);aa++;
2292           row = brow + krow;    /* row index */
2293           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2294           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2295         }
2296       }
2297       aj++;
2298     }
2299   }
2300   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatCopy_SeqBAIJ"
2306 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2307 {
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   /* If the two matrices have the same copy implementation, use fast copy. */
2312   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2313     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2314     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2315 
2316     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2317       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2318     }
2319     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2320   } else {
2321     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2322   }
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2328 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2329 {
2330   PetscErrorCode ierr;
2331 
2332   PetscFunctionBegin;
2333   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2339 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2340 {
2341   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2342   PetscFunctionBegin;
2343   *array = a->a;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2349 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2350 {
2351   PetscFunctionBegin;
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #include "petscblaslapack.h"
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2358 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2359 {
2360   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2361   PetscErrorCode ierr;
2362   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2363   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2364 
2365   PetscFunctionBegin;
2366   if (str == SAME_NONZERO_PATTERN) {
2367     PetscScalar alpha = a;
2368     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2369   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2370     if (y->xtoy && y->XtoY != X) {
2371       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2372       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2373     }
2374     if (!y->xtoy) { /* get xtoy */
2375       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2376       y->XtoY = X;
2377       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2378     }
2379     bs2 = bs*bs;
2380     for (i=0; i<x->nz; i++) {
2381       j = 0;
2382       while (j < bs2){
2383         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2384         j++;
2385       }
2386     }
2387     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2388   } else {
2389     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2396 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2397 {
2398   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2399   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2400   MatScalar      *aa = a->a;
2401 
2402   PetscFunctionBegin;
2403   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2409 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2410 {
2411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2412   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2413   MatScalar      *aa = a->a;
2414 
2415   PetscFunctionBegin;
2416   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2424 /*
2425     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2426 */
2427 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2428 {
2429   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2430   PetscErrorCode ierr;
2431   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2432   PetscInt       nz = a->i[m],row,*jj,mr,col;
2433 
2434   PetscFunctionBegin;
2435   *nn = n;
2436   if (!ia) PetscFunctionReturn(0);
2437   if (symmetric) {
2438     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2439   } else {
2440     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2441     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2442     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2443     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2444     jj = a->j;
2445     for (i=0; i<nz; i++) {
2446       collengths[jj[i]]++;
2447     }
2448     cia[0] = oshift;
2449     for (i=0; i<n; i++) {
2450       cia[i+1] = cia[i] + collengths[i];
2451     }
2452     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2453     jj   = a->j;
2454     for (row=0; row<m; row++) {
2455       mr = a->i[row+1] - a->i[row];
2456       for (i=0; i<mr; i++) {
2457         col = *jj++;
2458         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2459       }
2460     }
2461     ierr = PetscFree(collengths);CHKERRQ(ierr);
2462     *ia = cia; *ja = cja;
2463   }
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2469 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2470 {
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   if (!ia) PetscFunctionReturn(0);
2475   ierr = PetscFree(*ia);CHKERRQ(ierr);
2476   ierr = PetscFree(*ja);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2482 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2483 {
2484   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2485   PetscErrorCode ierr;
2486   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2487   PetscScalar    dx,*y,*xx,*w3_array;
2488   PetscScalar    *vscale_array;
2489   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2490   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2491   void           *fctx = coloring->fctx;
2492   PetscTruth     flg = PETSC_FALSE;
2493   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2494   Vec            x1_tmp;
2495 
2496   PetscFunctionBegin;
2497   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2498   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2499   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2500   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2501 
2502   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2503   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2504   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2505   if (flg) {
2506     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2507   } else {
2508     PetscTruth assembled;
2509     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2510     if (assembled) {
2511       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2512     }
2513   }
2514 
2515   x1_tmp = x1;
2516   if (!coloring->vscale){
2517     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2518   }
2519 
2520   /*
2521     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2522     coloring->F for the coarser grids from the finest
2523   */
2524   if (coloring->F) {
2525     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2526     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2527     if (m1 != m2) {
2528       coloring->F = 0;
2529       }
2530     }
2531 
2532   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2533     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2534   }
2535   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2536 
2537   /* Set w1 = F(x1) */
2538   if (coloring->F) {
2539     w1          = coloring->F; /* use already computed value of function */
2540     coloring->F = 0;
2541   } else {
2542     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2543     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2544     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2545   }
2546 
2547   if (!coloring->w3) {
2548     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2549     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2550   }
2551   w3 = coloring->w3;
2552 
2553     CHKMEMQ;
2554     /* Compute all the local scale factors, including ghost points */
2555   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2556   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2557   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2558   if (ctype == IS_COLORING_GHOSTED){
2559     col_start = 0; col_end = N;
2560   } else if (ctype == IS_COLORING_GLOBAL){
2561     xx = xx - start;
2562     vscale_array = vscale_array - start;
2563     col_start = start; col_end = N + start;
2564   }    CHKMEMQ;
2565   for (col=col_start; col<col_end; col++){
2566     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2567     if (coloring->htype[0] == 'w') {
2568       dx = 1.0 + unorm;
2569     } else {
2570       dx  = xx[col];
2571     }
2572     if (dx == 0.0) dx = 1.0;
2573 #if !defined(PETSC_USE_COMPLEX)
2574     if (dx < umin && dx >= 0.0)      dx = umin;
2575     else if (dx < 0.0 && dx > -umin) dx = -umin;
2576 #else
2577     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2578     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2579 #endif
2580     dx               *= epsilon;
2581     vscale_array[col] = 1.0/dx;
2582   }     CHKMEMQ;
2583   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2584   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2585   if (ctype == IS_COLORING_GLOBAL){
2586     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2587     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2588   }
2589         CHKMEMQ;
2590   if (coloring->vscaleforrow) {
2591     vscaleforrow = coloring->vscaleforrow;
2592   } else {
2593     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2594   }
2595 
2596 
2597   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2598   /*
2599     Loop over each color
2600   */
2601   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2602   for (k=0; k<coloring->ncolors; k++) {
2603     coloring->currentcolor = k;
2604     for (i=0; i<bs; i++) {
2605       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2606       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2607       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2608       /*
2609 	Loop over each column associated with color
2610 	adding the perturbation to the vector w3.
2611       */
2612       for (l=0; l<coloring->ncolumns[k]; l++) {
2613 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2614 	if (coloring->htype[0] == 'w') {
2615 	  dx = 1.0 + unorm;
2616 	} else {
2617 	  dx  = xx[col];
2618 	}
2619 	if (dx == 0.0) dx = 1.0;
2620 #if !defined(PETSC_USE_COMPLEX)
2621 	if (dx < umin && dx >= 0.0)      dx = umin;
2622 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2623 #else
2624 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2625 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2626 #endif
2627 	dx            *= epsilon;
2628 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2629 	w3_array[col] += dx;
2630       }
2631       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2632       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2633 
2634       /*
2635 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2636 	w2 = F(x1 + dx) - F(x1)
2637       */
2638       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2639       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2640       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2641       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2642 
2643       /*
2644 	Loop over rows of vector, putting results into Jacobian matrix
2645       */
2646       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2647       for (l=0; l<coloring->nrows[k]; l++) {
2648 	row    = bs*coloring->rows[k][l];             /* local row index */
2649 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2650         for (j=0; j<bs; j++) {
2651   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2652           srows[j]  = row + start + j;
2653         }
2654 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2655       }
2656       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2657     }
2658   } /* endof for each color */
2659   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2660   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2661   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2662   ierr = PetscFree(srows);CHKERRQ(ierr);
2663 
2664   coloring->currentcolor = -1;
2665   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2666   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2667   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 /* -------------------------------------------------------------------*/
2672 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2673        MatGetRow_SeqBAIJ,
2674        MatRestoreRow_SeqBAIJ,
2675        MatMult_SeqBAIJ_N,
2676 /* 4*/ MatMultAdd_SeqBAIJ_N,
2677        MatMultTranspose_SeqBAIJ,
2678        MatMultTransposeAdd_SeqBAIJ,
2679        0,
2680        0,
2681        0,
2682 /*10*/ 0,
2683        MatLUFactor_SeqBAIJ,
2684        0,
2685        0,
2686        MatTranspose_SeqBAIJ,
2687 /*15*/ MatGetInfo_SeqBAIJ,
2688        MatEqual_SeqBAIJ,
2689        MatGetDiagonal_SeqBAIJ,
2690        MatDiagonalScale_SeqBAIJ,
2691        MatNorm_SeqBAIJ,
2692 /*20*/ 0,
2693        MatAssemblyEnd_SeqBAIJ,
2694        MatSetOption_SeqBAIJ,
2695        MatZeroEntries_SeqBAIJ,
2696 /*24*/ MatZeroRows_SeqBAIJ,
2697        0,
2698        0,
2699        0,
2700        0,
2701 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2702        0,
2703        0,
2704        MatGetArray_SeqBAIJ,
2705        MatRestoreArray_SeqBAIJ,
2706 /*34*/ MatDuplicate_SeqBAIJ,
2707        0,
2708        0,
2709        MatILUFactor_SeqBAIJ,
2710        0,
2711 /*39*/ MatAXPY_SeqBAIJ,
2712        MatGetSubMatrices_SeqBAIJ,
2713        MatIncreaseOverlap_SeqBAIJ,
2714        MatGetValues_SeqBAIJ,
2715        MatCopy_SeqBAIJ,
2716 /*44*/ 0,
2717        MatScale_SeqBAIJ,
2718        0,
2719        0,
2720        MatILUDTFactor_SeqBAIJ,
2721 /*49*/ 0,
2722        MatGetRowIJ_SeqBAIJ,
2723        MatRestoreRowIJ_SeqBAIJ,
2724        MatGetColumnIJ_SeqBAIJ,
2725        MatRestoreColumnIJ_SeqBAIJ,
2726 /*54*/ MatFDColoringCreate_SeqAIJ,
2727        0,
2728        0,
2729        0,
2730        MatSetValuesBlocked_SeqBAIJ,
2731 /*59*/ MatGetSubMatrix_SeqBAIJ,
2732        MatDestroy_SeqBAIJ,
2733        MatView_SeqBAIJ,
2734        0,
2735        0,
2736 /*64*/ 0,
2737        0,
2738        0,
2739        0,
2740        0,
2741 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2742        0,
2743        MatConvert_Basic,
2744        0,
2745        0,
2746 /*74*/ 0,
2747        MatFDColoringApply_BAIJ,
2748        0,
2749        0,
2750        0,
2751 /*79*/ 0,
2752        0,
2753        0,
2754        0,
2755        MatLoad_SeqBAIJ,
2756 /*84*/ 0,
2757        0,
2758        0,
2759        0,
2760        0,
2761 /*89*/ 0,
2762        0,
2763        0,
2764        0,
2765        0,
2766 /*94*/ 0,
2767        0,
2768        0,
2769        0,
2770        0,
2771 /*99*/0,
2772        0,
2773        0,
2774        0,
2775        0,
2776 /*104*/0,
2777        MatRealPart_SeqBAIJ,
2778        MatImaginaryPart_SeqBAIJ,
2779        0,
2780        0,
2781 /*109*/0,
2782        0,
2783        0,
2784        0,
2785        MatMissingDiagonal_SeqBAIJ
2786 /*114*/
2787 };
2788 
2789 EXTERN_C_BEGIN
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2792 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2793 {
2794   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2795   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   if (aij->nonew != 1) {
2800     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2801   }
2802 
2803   /* allocate space for values if not already there */
2804   if (!aij->saved_values) {
2805     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2806   }
2807 
2808   /* copy values over */
2809   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 EXTERN_C_END
2813 
2814 EXTERN_C_BEGIN
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2817 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2818 {
2819   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2820   PetscErrorCode ierr;
2821   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2822 
2823   PetscFunctionBegin;
2824   if (aij->nonew != 1) {
2825     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2826   }
2827   if (!aij->saved_values) {
2828     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2829   }
2830 
2831   /* copy values over */
2832   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 EXTERN_C_END
2836 
2837 EXTERN_C_BEGIN
2838 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2839 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2840 EXTERN_C_END
2841 
2842 EXTERN_C_BEGIN
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2845 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2846 {
2847   Mat_SeqBAIJ    *b;
2848   PetscErrorCode ierr;
2849   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2850   PetscTruth     flg,skipallocation = PETSC_FALSE;
2851 
2852   PetscFunctionBegin;
2853 
2854   if (nz == MAT_SKIP_ALLOCATION) {
2855     skipallocation = PETSC_TRUE;
2856     nz             = 0;
2857   }
2858 
2859   if (bs < 0) {
2860     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2861       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2862     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2863     bs   = PetscAbs(bs);
2864   }
2865   if (nnz && newbs != bs) {
2866     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2867   }
2868   bs   = newbs;
2869 
2870   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2871   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2872   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2873   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2874 
2875   B->preallocated = PETSC_TRUE;
2876 
2877   mbs  = B->rmap->n/bs;
2878   nbs  = B->cmap->n/bs;
2879   bs2  = bs*bs;
2880 
2881   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2882     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2883   }
2884 
2885   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2886   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2887   if (nnz) {
2888     for (i=0; i<mbs; i++) {
2889       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2890       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2891     }
2892   }
2893 
2894   b       = (Mat_SeqBAIJ*)B->data;
2895   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2896     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2897   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2898 
2899   if (!flg) {
2900     switch (bs) {
2901     case 1:
2902       B->ops->mult            = MatMult_SeqBAIJ_1;
2903       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2904       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2905       break;
2906     case 2:
2907       B->ops->mult            = MatMult_SeqBAIJ_2;
2908       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2909       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2910       break;
2911     case 3:
2912       B->ops->mult            = MatMult_SeqBAIJ_3;
2913       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2914       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2915       break;
2916     case 4:
2917       B->ops->mult            = MatMult_SeqBAIJ_4;
2918       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2919       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2920       break;
2921     case 5:
2922       B->ops->mult            = MatMult_SeqBAIJ_5;
2923       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2924       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2925       break;
2926     case 6:
2927       B->ops->mult            = MatMult_SeqBAIJ_6;
2928       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2929       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2930       break;
2931     case 7:
2932       B->ops->mult            = MatMult_SeqBAIJ_7;
2933       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2934       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2935       break;
2936     default:
2937       B->ops->mult            = MatMult_SeqBAIJ_N;
2938       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2939       break;
2940     }
2941   }
2942   B->rmap->bs      = bs;
2943   b->mbs     = mbs;
2944   b->nbs     = nbs;
2945   if (!skipallocation) {
2946     if (!b->imax) {
2947       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2948     }
2949     /* b->ilen will count nonzeros in each block row so far. */
2950     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2951     if (!nnz) {
2952       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2953       else if (nz <= 0)        nz = 1;
2954       for (i=0; i<mbs; i++) b->imax[i] = nz;
2955       nz = nz*mbs;
2956     } else {
2957       nz = 0;
2958       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2959     }
2960 
2961     /* allocate the matrix space */
2962     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2963     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2964     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2965     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2966     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2967     b->singlemalloc = PETSC_TRUE;
2968     b->i[0] = 0;
2969     for (i=1; i<mbs+1; i++) {
2970       b->i[i] = b->i[i-1] + b->imax[i-1];
2971     }
2972     b->free_a     = PETSC_TRUE;
2973     b->free_ij    = PETSC_TRUE;
2974   } else {
2975     b->free_a     = PETSC_FALSE;
2976     b->free_ij    = PETSC_FALSE;
2977   }
2978 
2979   B->rmap->bs          = bs;
2980   b->bs2              = bs2;
2981   b->mbs              = mbs;
2982   b->nz               = 0;
2983   b->maxnz            = nz*bs2;
2984   B->info.nz_unneeded = (PetscReal)b->maxnz;
2985   PetscFunctionReturn(0);
2986 }
2987 EXTERN_C_END
2988 
2989 EXTERN_C_BEGIN
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2992 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2993 {
2994   PetscInt       i,m,nz,nz_max=0,*nnz;
2995   PetscScalar    *values=0;
2996   PetscErrorCode ierr;
2997 
2998   PetscFunctionBegin;
2999 
3000   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3001 
3002   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3003   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3004   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3005   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3006   m = B->rmap->n/bs;
3007 
3008   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3009   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3010   for(i=0; i<m; i++) {
3011     nz = ii[i+1]- ii[i];
3012     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3013     nz_max = PetscMax(nz_max, nz);
3014     nnz[i] = nz;
3015   }
3016   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3017   ierr = PetscFree(nnz);CHKERRQ(ierr);
3018 
3019   values = (PetscScalar*)V;
3020   if (!values) {
3021     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3022     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3023   }
3024   for (i=0; i<m; i++) {
3025     PetscInt          ncols  = ii[i+1] - ii[i];
3026     const PetscInt    *icols = jj + ii[i];
3027     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3028     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3029   }
3030   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3031   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033 
3034   PetscFunctionReturn(0);
3035 }
3036 EXTERN_C_END
3037 
3038 
3039 EXTERN_C_BEGIN
3040 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3041 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3042 EXTERN_C_END
3043 
3044 /*MC
3045    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3046    block sparse compressed row format.
3047 
3048    Options Database Keys:
3049 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3050 
3051   Level: beginner
3052 
3053 .seealso: MatCreateSeqBAIJ()
3054 M*/
3055 
3056 
3057 EXTERN_C_BEGIN
3058 #undef __FUNCT__
3059 #define __FUNCT__ "MatCreate_SeqBAIJ"
3060 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3061 {
3062   PetscErrorCode ierr;
3063   PetscMPIInt    size;
3064   Mat_SeqBAIJ    *b;
3065 
3066   PetscFunctionBegin;
3067   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3068   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3069 
3070   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3071   B->data = (void*)b;
3072   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3073   B->mapping               = 0;
3074   b->row                   = 0;
3075   b->col                   = 0;
3076   b->icol                  = 0;
3077   b->reallocs              = 0;
3078   b->saved_values          = 0;
3079 
3080   b->roworiented           = PETSC_TRUE;
3081   b->nonew                 = 0;
3082   b->diag                  = 0;
3083   b->solve_work            = 0;
3084   b->mult_work             = 0;
3085   B->spptr                 = 0;
3086   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3087   b->keepnonzeropattern    = PETSC_FALSE;
3088   b->xtoy                  = 0;
3089   b->XtoY                  = 0;
3090   b->compressedrow.use     = PETSC_FALSE;
3091   b->compressedrow.nrows   = 0;
3092   b->compressedrow.i       = PETSC_NULL;
3093   b->compressedrow.rindex  = PETSC_NULL;
3094   b->compressedrow.checked = PETSC_FALSE;
3095   B->same_nonzero          = PETSC_FALSE;
3096 
3097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
3098                                      "MatGetFactorAvailable_seqbaij_petsc",
3099                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
3101                                      "MatGetFactor_seqbaij_petsc",
3102                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3104                                      "MatInvertBlockDiagonal_SeqBAIJ",
3105                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3107                                      "MatStoreValues_SeqBAIJ",
3108                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3110                                      "MatRetrieveValues_SeqBAIJ",
3111                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3113                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3114                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3116                                      "MatConvert_SeqBAIJ_SeqAIJ",
3117                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3119                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3120                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3122                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3123                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3125                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3126                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3127   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3128   PetscFunctionReturn(0);
3129 }
3130 EXTERN_C_END
3131 
3132 #undef __FUNCT__
3133 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3134 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3135 {
3136   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3137   PetscErrorCode ierr;
3138   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3139 
3140   PetscFunctionBegin;
3141   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3142 
3143   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3144   for (i=0; i<mbs; i++) {
3145     c->imax[i] = a->imax[i];
3146     c->ilen[i] = a->ilen[i];
3147   }
3148 
3149   /* allocate the matrix space */
3150   if (mallocmatspace){
3151     ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3152     ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3153     c->singlemalloc = PETSC_TRUE;
3154     ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3155     if (mbs > 0) {
3156       ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3157       if (cpvalues == MAT_COPY_VALUES) {
3158         ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3159       } else {
3160         ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3161       }
3162     }
3163   }
3164 
3165   c->roworiented = a->roworiented;
3166   c->nonew       = a->nonew;
3167   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
3168   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
3169   c->bs2         = a->bs2;
3170   c->mbs         = a->mbs;
3171   c->nbs         = a->nbs;
3172 
3173   if (a->diag) {
3174     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3175     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3176     for (i=0; i<mbs; i++) {
3177       c->diag[i] = a->diag[i];
3178     }
3179   } else c->diag        = 0;
3180   c->nz                 = a->nz;
3181   c->maxnz              = a->maxnz;
3182   c->solve_work         = 0;
3183   c->mult_work          = 0;
3184   c->free_a             = PETSC_TRUE;
3185   c->free_ij            = PETSC_TRUE;
3186   C->preallocated       = PETSC_TRUE;
3187   C->assembled          = PETSC_TRUE;
3188 
3189   c->compressedrow.use     = a->compressedrow.use;
3190   c->compressedrow.nrows   = a->compressedrow.nrows;
3191   c->compressedrow.checked = a->compressedrow.checked;
3192   if ( a->compressedrow.checked && a->compressedrow.use){
3193     i = a->compressedrow.nrows;
3194     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
3195     c->compressedrow.rindex = c->compressedrow.i + i + 1;
3196     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3197     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3198   } else {
3199     c->compressedrow.use    = PETSC_FALSE;
3200     c->compressedrow.i      = PETSC_NULL;
3201     c->compressedrow.rindex = PETSC_NULL;
3202   }
3203   C->same_nonzero = A->same_nonzero;
3204   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3210 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3211 {
3212     PetscErrorCode ierr;
3213 
3214   PetscFunctionBegin;
3215   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3216   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3217   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3218   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatLoad_SeqBAIJ"
3224 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3225 {
3226   Mat_SeqBAIJ    *a;
3227   Mat            B;
3228   PetscErrorCode ierr;
3229   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3230   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3231   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3232   PetscInt       *masked,nmask,tmp,bs2,ishift;
3233   PetscMPIInt    size;
3234   int            fd;
3235   PetscScalar    *aa;
3236   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3237 
3238   PetscFunctionBegin;
3239   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3240     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3241   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3242   bs2  = bs*bs;
3243 
3244   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3245   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3246   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3247   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3248   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3249   M = header[1]; N = header[2]; nz = header[3];
3250 
3251   if (header[3] < 0) {
3252     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3253   }
3254 
3255   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3256 
3257   /*
3258      This code adds extra rows to make sure the number of rows is
3259     divisible by the blocksize
3260   */
3261   mbs        = M/bs;
3262   extra_rows = bs - M + bs*(mbs);
3263   if (extra_rows == bs) extra_rows = 0;
3264   else                  mbs++;
3265   if (extra_rows) {
3266     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3267   }
3268 
3269   /* read in row lengths */
3270   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3271   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3272   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3273 
3274   /* read in column indices */
3275   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3276   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3277   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3278 
3279   /* loop over row lengths determining block row lengths */
3280   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3281   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3282   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3283   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3284   masked   = mask + mbs;
3285   rowcount = 0; nzcount = 0;
3286   for (i=0; i<mbs; i++) {
3287     nmask = 0;
3288     for (j=0; j<bs; j++) {
3289       kmax = rowlengths[rowcount];
3290       for (k=0; k<kmax; k++) {
3291         tmp = jj[nzcount++]/bs;
3292         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3293       }
3294       rowcount++;
3295     }
3296     browlengths[i] += nmask;
3297     /* zero out the mask elements we set */
3298     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3299   }
3300 
3301   /* create our matrix */
3302   ierr = MatCreate(comm,&B);
3303   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3304   ierr = MatSetType(B,type);CHKERRQ(ierr);
3305   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3306   a = (Mat_SeqBAIJ*)B->data;
3307 
3308   /* set matrix "i" values */
3309   a->i[0] = 0;
3310   for (i=1; i<= mbs; i++) {
3311     a->i[i]      = a->i[i-1] + browlengths[i-1];
3312     a->ilen[i-1] = browlengths[i-1];
3313   }
3314   a->nz         = 0;
3315   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3316 
3317   /* read in nonzero values */
3318   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3319   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3320   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3321 
3322   /* set "a" and "j" values into matrix */
3323   nzcount = 0; jcount = 0;
3324   for (i=0; i<mbs; i++) {
3325     nzcountb = nzcount;
3326     nmask    = 0;
3327     for (j=0; j<bs; j++) {
3328       kmax = rowlengths[i*bs+j];
3329       for (k=0; k<kmax; k++) {
3330         tmp = jj[nzcount++]/bs;
3331 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3332       }
3333     }
3334     /* sort the masked values */
3335     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3336 
3337     /* set "j" values into matrix */
3338     maskcount = 1;
3339     for (j=0; j<nmask; j++) {
3340       a->j[jcount++]  = masked[j];
3341       mask[masked[j]] = maskcount++;
3342     }
3343     /* set "a" values into matrix */
3344     ishift = bs2*a->i[i];
3345     for (j=0; j<bs; j++) {
3346       kmax = rowlengths[i*bs+j];
3347       for (k=0; k<kmax; k++) {
3348         tmp       = jj[nzcountb]/bs ;
3349         block     = mask[tmp] - 1;
3350         point     = jj[nzcountb] - bs*tmp;
3351         idx       = ishift + bs2*block + j + bs*point;
3352         a->a[idx] = (MatScalar)aa[nzcountb++];
3353       }
3354     }
3355     /* zero out the mask elements we set */
3356     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3357   }
3358   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3359 
3360   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3361   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3362   ierr = PetscFree(aa);CHKERRQ(ierr);
3363   ierr = PetscFree(jj);CHKERRQ(ierr);
3364   ierr = PetscFree(mask);CHKERRQ(ierr);
3365 
3366   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368   ierr = MatView_Private(B);CHKERRQ(ierr);
3369 
3370   *A = B;
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 #undef __FUNCT__
3375 #define __FUNCT__ "MatCreateSeqBAIJ"
3376 /*@C
3377    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3378    compressed row) format.  For good matrix assembly performance the
3379    user should preallocate the matrix storage by setting the parameter nz
3380    (or the array nnz).  By setting these parameters accurately, performance
3381    during matrix assembly can be increased by more than a factor of 50.
3382 
3383    Collective on MPI_Comm
3384 
3385    Input Parameters:
3386 +  comm - MPI communicator, set to PETSC_COMM_SELF
3387 .  bs - size of block
3388 .  m - number of rows
3389 .  n - number of columns
3390 .  nz - number of nonzero blocks  per block row (same for all rows)
3391 -  nnz - array containing the number of nonzero blocks in the various block rows
3392          (possibly different for each block row) or PETSC_NULL
3393 
3394    Output Parameter:
3395 .  A - the matrix
3396 
3397    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3398    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3399    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3400 
3401    Options Database Keys:
3402 .   -mat_no_unroll - uses code that does not unroll the loops in the
3403                      block calculations (much slower)
3404 .    -mat_block_size - size of the blocks to use
3405 
3406    Level: intermediate
3407 
3408    Notes:
3409    The number of rows and columns must be divisible by blocksize.
3410 
3411    If the nnz parameter is given then the nz parameter is ignored
3412 
3413    A nonzero block is any block that as 1 or more nonzeros in it
3414 
3415    The block AIJ format is fully compatible with standard Fortran 77
3416    storage.  That is, the stored row and column indices can begin at
3417    either one (as in Fortran) or zero.  See the users' manual for details.
3418 
3419    Specify the preallocated storage with either nz or nnz (not both).
3420    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3421    allocation.  For additional details, see the users manual chapter on
3422    matrices.
3423 
3424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3425 @*/
3426 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3427 {
3428   PetscErrorCode ierr;
3429 
3430   PetscFunctionBegin;
3431   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3432   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3433   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3434   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3440 /*@C
3441    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3442    per row in the matrix. For good matrix assembly performance the
3443    user should preallocate the matrix storage by setting the parameter nz
3444    (or the array nnz).  By setting these parameters accurately, performance
3445    during matrix assembly can be increased by more than a factor of 50.
3446 
3447    Collective on MPI_Comm
3448 
3449    Input Parameters:
3450 +  A - the matrix
3451 .  bs - size of block
3452 .  nz - number of block nonzeros per block row (same for all rows)
3453 -  nnz - array containing the number of block nonzeros in the various block rows
3454          (possibly different for each block row) or PETSC_NULL
3455 
3456    Options Database Keys:
3457 .   -mat_no_unroll - uses code that does not unroll the loops in the
3458                      block calculations (much slower)
3459 .    -mat_block_size - size of the blocks to use
3460 
3461    Level: intermediate
3462 
3463    Notes:
3464    If the nnz parameter is given then the nz parameter is ignored
3465 
3466    You can call MatGetInfo() to get information on how effective the preallocation was;
3467    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3468    You can also run with the option -info and look for messages with the string
3469    malloc in them to see if additional memory allocation was needed.
3470 
3471    The block AIJ format is fully compatible with standard Fortran 77
3472    storage.  That is, the stored row and column indices can begin at
3473    either one (as in Fortran) or zero.  See the users' manual for details.
3474 
3475    Specify the preallocated storage with either nz or nnz (not both).
3476    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3477    allocation.  For additional details, see the users manual chapter on
3478    matrices.
3479 
3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3483 {
3484   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3485 
3486   PetscFunctionBegin;
3487   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3488   if (f) {
3489     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3490   }
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3496 /*@C
3497    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3498    (the default sequential PETSc format).
3499 
3500    Collective on MPI_Comm
3501 
3502    Input Parameters:
3503 +  A - the matrix
3504 .  i - the indices into j for the start of each local row (starts with zero)
3505 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3506 -  v - optional values in the matrix
3507 
3508    Level: developer
3509 
3510 .keywords: matrix, aij, compressed row, sparse
3511 
3512 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3513 @*/
3514 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3515 {
3516   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3517 
3518   PetscFunctionBegin;
3519   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3520   if (f) {
3521     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3522   }
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 
3527 #undef __FUNCT__
3528 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3529 /*@
3530      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3531               (upper triangular entries in CSR format) provided by the user.
3532 
3533      Collective on MPI_Comm
3534 
3535    Input Parameters:
3536 +  comm - must be an MPI communicator of size 1
3537 .  bs - size of block
3538 .  m - number of rows
3539 .  n - number of columns
3540 .  i - row indices
3541 .  j - column indices
3542 -  a - matrix values
3543 
3544    Output Parameter:
3545 .  mat - the matrix
3546 
3547    Level: intermediate
3548 
3549    Notes:
3550        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3551     once the matrix is destroyed
3552 
3553        You cannot set new nonzero locations into this matrix, that will generate an error.
3554 
3555        The i and j indices are 0 based
3556 
3557 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3558 
3559 @*/
3560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3561 {
3562   PetscErrorCode ierr;
3563   PetscInt       ii;
3564   Mat_SeqBAIJ    *baij;
3565 
3566   PetscFunctionBegin;
3567   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3568   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3569 
3570   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3571   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3572   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3573   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3574   baij = (Mat_SeqBAIJ*)(*mat)->data;
3575   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3576 
3577   baij->i = i;
3578   baij->j = j;
3579   baij->a = a;
3580   baij->singlemalloc = PETSC_FALSE;
3581   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3582   baij->free_a       = PETSC_FALSE;
3583   baij->free_ij       = PETSC_FALSE;
3584 
3585   for (ii=0; ii<m; ii++) {
3586     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3587 #if defined(PETSC_USE_DEBUG)
3588     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3589 #endif
3590   }
3591 #if defined(PETSC_USE_DEBUG)
3592   for (ii=0; ii<baij->i[m]; ii++) {
3593     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3594     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3595   }
3596 #endif
3597 
3598   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3599   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3600   PetscFunctionReturn(0);
3601 }
3602