xref: /petsc/src/mat/impls/baij/seq/baij.c (revision aabbc4fba255ddf684b806b03041ff741c689cc8)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,j;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         for (j=0; j<nz; j++) {
175           s1 -= v[j]*x[vi[j]];
176         }
177         x[i2]   = idiag[0]*s1;
178         idiag   += 1;
179         i2      += 1;
180       }
181       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
182       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
183     }
184     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
185         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
186       i2    = 0;
187       mdiag = a->idiag+a->mbs;
188       for (i=0; i<m; i++) {
189         x1      = x[i2];
190         x[i2]   = mdiag[0]*x1;
191         mdiag  += 1;
192         i2     += 1;
193       }
194       ierr = PetscLogFlops(m);CHKERRQ(ierr);
195     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
196       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
197     }
198     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
199       idiag   = a->idiag+a->mbs - 1;
200       i2      = m - 1;
201       x1      = x[i2];
202       x[i2]   = idiag[0]*x1;
203       idiag -= 1;
204       i2    -= 1;
205       for (i=m-2; i>=0; i--) {
206         v     = aa + (diag[i]+1);
207         vi    = aj + diag[i] + 1;
208         nz    = ai[i+1] - diag[i] - 1;
209         s1    = x[i2];
210         for (j=0; j<nz; j++) {
211           s1 -= v[j]*x[vi[j]];
212         }
213         x[i2]   = idiag[0]*s1;
214         idiag   -= 1;
215         i2      -= 1;
216       }
217       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
218     }
219   } else {
220     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
221   }
222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 #undef __FUNCT__
228 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
230 {
231   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
232   PetscScalar        *x,x1,x2,s1,s2;
233   const PetscScalar  *b;
234   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
235   PetscErrorCode     ierr;
236   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
237   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
238 
239   PetscFunctionBegin;
240   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
241   its = its*lits;
242   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
243   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
244   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
245   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
246   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
247 
248   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
249 
250   diag  = a->diag;
251   idiag = a->idiag;
252   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
254 
255   if (flag & SOR_ZERO_INITIAL_GUESS) {
256     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
257       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
258       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
259       i2     = 2;
260       idiag += 4;
261       for (i=1; i<m; i++) {
262 	v     = aa + 4*ai[i];
263 	vi    = aj + ai[i];
264 	nz    = diag[i] - ai[i];
265 	s1    = b[i2]; s2 = b[i2+1];
266         for (j=0; j<nz; j++) {
267 	  idx  = 2*vi[j];
268           it   = 4*j;
269 	  x1   = x[idx]; x2 = x[1+idx];
270 	  s1  -= v[it]*x1 + v[it+2]*x2;
271 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
272 	}
273 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
274 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
275         idiag   += 4;
276         i2      += 2;
277       }
278       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
279       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
280     }
281     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
282         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
283       i2    = 0;
284       mdiag = a->idiag+4*a->mbs;
285       for (i=0; i<m; i++) {
286         x1      = x[i2]; x2 = x[i2+1];
287         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
288         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
289         mdiag  += 4;
290         i2     += 2;
291       }
292       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
293     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
294       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
295     }
296     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
297       idiag   = a->idiag+4*a->mbs - 4;
298       i2      = 2*m - 2;
299       x1      = x[i2]; x2 = x[i2+1];
300       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
301       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
302       idiag -= 4;
303       i2    -= 2;
304       for (i=m-2; i>=0; i--) {
305 	v     = aa + 4*(diag[i]+1);
306 	vi    = aj + diag[i] + 1;
307 	nz    = ai[i+1] - diag[i] - 1;
308 	s1    = x[i2]; s2 = x[i2+1];
309         for (j=0; j<nz; j++) {
310  	  idx  = 2*vi[j];
311           it   = 4*j;
312 	  x1   = x[idx]; x2 = x[1+idx];
313 	  s1  -= v[it]*x1 + v[it+2]*x2;
314 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
315 	}
316 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
317 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
318         idiag   -= 4;
319         i2      -= 2;
320       }
321       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
322     }
323   } else {
324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
325   }
326   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
334 {
335   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
336   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
337   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
338   const PetscScalar  *b;
339   PetscErrorCode     ierr;
340   PetscInt           m = a->mbs,i,i2,nz,idx;
341   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
342 
343   PetscFunctionBegin;
344   its = its*lits;
345   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
346   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
347   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
348   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
349   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
350   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
351 
352   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
353 
354   diag  = a->diag;
355   idiag = a->idiag;
356   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
358 
359   if (flag & SOR_ZERO_INITIAL_GUESS) {
360     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
361       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
362       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
363       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
364       i2     = 3;
365       idiag += 9;
366       for (i=1; i<m; i++) {
367         v     = aa + 9*ai[i];
368         vi    = aj + ai[i];
369         nz    = diag[i] - ai[i];
370         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
371         while (nz--) {
372           idx  = 3*(*vi++);
373           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
374           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
375           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
376           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
377           v   += 9;
378         }
379         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
380         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
381         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
382         idiag   += 9;
383         i2      += 3;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+9*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
395         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
396         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
397         mdiag  += 9;
398         i2     += 3;
399       }
400       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
401     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
402       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
403     }
404     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
405       idiag   = a->idiag+9*a->mbs - 9;
406       i2      = 3*m - 3;
407       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
408       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
409       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
410       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
411       idiag -= 9;
412       i2    -= 3;
413       for (i=m-2; i>=0; i--) {
414         v     = aa + 9*(diag[i]+1);
415         vi    = aj + diag[i] + 1;
416         nz    = ai[i+1] - diag[i] - 1;
417         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
418         while (nz--) {
419           idx  = 3*(*vi++);
420           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
421           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
422           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
423           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
424           v   += 9;
425         }
426         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
427         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
428         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
429         idiag   -= 9;
430         i2      -= 3;
431       }
432       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
433     }
434   } else {
435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436   }
437   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
438   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
445 {
446   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
447   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
448   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
449   const PetscScalar  *b;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
461   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
476       i2     = 4;
477       idiag += 16;
478       for (i=1; i<m; i++) {
479 	v     = aa + 16*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
483 	while (nz--) {
484 	  idx  = 4*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
486 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
487 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
488 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
489 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
490 	  v   += 16;
491 	}
492 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
493 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
494 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
495 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
496         idiag   += 16;
497         i2      += 4;
498       }
499       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
501     }
502     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504       i2    = 0;
505       mdiag = a->idiag+16*a->mbs;
506       for (i=0; i<m; i++) {
507         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
509         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
510         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
511         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
512         mdiag  += 16;
513         i2     += 4;
514       }
515       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
516     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
517       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
518     }
519     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
520       idiag   = a->idiag+16*a->mbs - 16;
521       i2      = 4*m - 4;
522       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
523       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
524       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
525       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
526       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
527       idiag -= 16;
528       i2    -= 4;
529       for (i=m-2; i>=0; i--) {
530 	v     = aa + 16*(diag[i]+1);
531 	vi    = aj + diag[i] + 1;
532 	nz    = ai[i+1] - diag[i] - 1;
533 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
534 	while (nz--) {
535 	  idx  = 4*(*vi++);
536 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
537 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
538 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
539 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
540 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
541 	  v   += 16;
542 	}
543 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
544 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
545 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
546 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
547         idiag   -= 16;
548         i2      -= 4;
549       }
550       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
551     }
552   } else {
553     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
554   }
555   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
556   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
563 {
564   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
565   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
566   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
567   const PetscScalar  *b;
568   PetscErrorCode     ierr;
569   PetscInt           m = a->mbs,i,i2,nz,idx;
570   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
571 
572   PetscFunctionBegin;
573   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
574   its = its*lits;
575   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
576   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
577   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
578   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
579   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
580 
581   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
582 
583   diag  = a->diag;
584   idiag = a->idiag;
585   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
587 
588   if (flag & SOR_ZERO_INITIAL_GUESS) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
590       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
591       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
592       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
593       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
594       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
595       i2     = 5;
596       idiag += 25;
597       for (i=1; i<m; i++) {
598 	v     = aa + 25*ai[i];
599 	vi    = aj + ai[i];
600 	nz    = diag[i] - ai[i];
601 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
602 	while (nz--) {
603 	  idx  = 5*(*vi++);
604 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
606 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
607 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
608 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
609 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
610 	  v   += 25;
611 	}
612 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
613 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
614 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
615 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
616 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
617         idiag   += 25;
618         i2      += 5;
619       }
620       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
621       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
622     }
623     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
624         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
625       i2    = 0;
626       mdiag = a->idiag+25*a->mbs;
627       for (i=0; i<m; i++) {
628         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
630         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
631         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
632         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
633         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
634         mdiag  += 25;
635         i2     += 5;
636       }
637       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
638     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
639       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
640     }
641     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
642       idiag   = a->idiag+25*a->mbs - 25;
643       i2      = 5*m - 5;
644       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
645       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
646       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
647       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
648       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
649       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
650       idiag -= 25;
651       i2    -= 5;
652       for (i=m-2; i>=0; i--) {
653 	v     = aa + 25*(diag[i]+1);
654 	vi    = aj + diag[i] + 1;
655 	nz    = ai[i+1] - diag[i] - 1;
656 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
657 	while (nz--) {
658 	  idx  = 5*(*vi++);
659 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
660 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
661 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
662 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
663 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
664 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
665 	  v   += 25;
666 	}
667 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
668 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
669 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
670 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
671 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
672         idiag   -= 25;
673         i2      -= 5;
674       }
675       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
676     }
677   } else {
678     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
679   }
680   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
681   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
688 {
689   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
690   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
691   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
692   const PetscScalar  *b;
693   PetscErrorCode     ierr;
694   PetscInt           m = a->mbs,i,i2,nz,idx;
695   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
696 
697   PetscFunctionBegin;
698   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
699   its = its*lits;
700   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
701   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
702   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
703   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
704   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
705 
706   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
707 
708   diag  = a->diag;
709   idiag = a->idiag;
710   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
711   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
712 
713   if (flag & SOR_ZERO_INITIAL_GUESS) {
714     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
715       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
716       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
717       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
718       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
719       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
720       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
721       i2     = 6;
722       idiag += 36;
723       for (i=1; i<m; i++) {
724         v     = aa + 36*ai[i];
725         vi    = aj + ai[i];
726         nz    = diag[i] - ai[i];
727         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
728         while (nz--) {
729           idx  = 6*(*vi++);
730           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
731           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
732           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
733           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
734           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
735           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
736           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
737           v   += 36;
738         }
739         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
740         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
741         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
742         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
743         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
744         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
745         idiag   += 36;
746         i2      += 6;
747       }
748       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
749       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
750     }
751     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
752         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
753       i2    = 0;
754       mdiag = a->idiag+36*a->mbs;
755       for (i=0; i<m; i++) {
756         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
758         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
759         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
760         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
761         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
762         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
763         mdiag  += 36;
764         i2     += 6;
765       }
766       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
767     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
769     }
770     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
771       idiag   = a->idiag+36*a->mbs - 36;
772       i2      = 6*m - 6;
773       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
774       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
775       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
776       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
777       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
778       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
779       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
780       idiag -= 36;
781       i2    -= 6;
782       for (i=m-2; i>=0; i--) {
783         v     = aa + 36*(diag[i]+1);
784         vi    = aj + diag[i] + 1;
785         nz    = ai[i+1] - diag[i] - 1;
786         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
787         while (nz--) {
788           idx  = 6*(*vi++);
789           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
790           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
791           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
792           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
793           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
794           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
795           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
796           v   += 36;
797         }
798         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
799         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
800         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
801         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
802         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
803         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
804         idiag   -= 36;
805         i2      -= 6;
806       }
807       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
808     }
809   } else {
810     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
811   }
812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
813   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
820 {
821   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
822   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
823   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
824   const PetscScalar  *b;
825   PetscErrorCode     ierr;
826   PetscInt           m = a->mbs,i,i2,nz,idx;
827   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
828 
829   PetscFunctionBegin;
830   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
831   its = its*lits;
832   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
833   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
834   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
835   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
836   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
837 
838   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
839 
840   diag  = a->diag;
841   idiag = a->idiag;
842   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
843   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
844 
845   if (flag & SOR_ZERO_INITIAL_GUESS) {
846     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
847       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
848       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
849       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
850       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
851       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
852       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
853       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
854       i2     = 7;
855       idiag += 49;
856       for (i=1; i<m; i++) {
857         v     = aa + 49*ai[i];
858         vi    = aj + ai[i];
859         nz    = diag[i] - ai[i];
860         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
861         while (nz--) {
862           idx  = 7*(*vi++);
863           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
864           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
865           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
866           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
867           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
868           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
869           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
870           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
871           v   += 49;
872         }
873         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
874         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
875         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
876         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
877         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
878         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
879         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
880         idiag   += 49;
881         i2      += 7;
882       }
883       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
884       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
885     }
886     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
887         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
888       i2    = 0;
889       mdiag = a->idiag+49*a->mbs;
890       for (i=0; i<m; i++) {
891         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
893         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
894         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
895         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
896         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
897         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
898         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
899         mdiag  += 36;
900         i2     += 6;
901       }
902       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
903     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
905     }
906     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
907       idiag   = a->idiag+49*a->mbs - 49;
908       i2      = 7*m - 7;
909       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
910       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
911       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
912       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
913       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
914       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
915       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
916       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
917       idiag -= 49;
918       i2    -= 7;
919       for (i=m-2; i>=0; i--) {
920         v     = aa + 49*(diag[i]+1);
921         vi    = aj + diag[i] + 1;
922         nz    = ai[i+1] - diag[i] - 1;
923         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
924         while (nz--) {
925           idx  = 7*(*vi++);
926           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
927           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
928           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
929           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
930           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
931           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
932           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
933           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
934           v   += 49;
935         }
936         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
937         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
938         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
939         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
940         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
941         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
942         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
943         idiag   -= 49;
944         i2      -= 7;
945       }
946       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
947     }
948   } else {
949     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
950   }
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957     Special version for direct calls from Fortran (Used in PETSc-fun3d)
958 */
959 #if defined(PETSC_HAVE_FORTRAN_CAPS)
960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
962 #define matsetvaluesblocked4_ matsetvaluesblocked4
963 #endif
964 
965 EXTERN_C_BEGIN
966 #undef __FUNCT__
967 #define __FUNCT__ "matsetvaluesblocked4_"
968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
969 {
970   Mat               A = *AA;
971   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
972   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
973   PetscInt          *ai=a->i,*ailen=a->ilen;
974   PetscInt          *aj=a->j,stepval,lastcol = -1;
975   const PetscScalar *value = v;
976   MatScalar         *ap,*aa = a->a,*bap;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol) low = 0; else high = nrow;
991       lastcol = col;
992       value = v + k*(stepval+4 + l)*4;
993       while (high-low > 7) {
994         t = (low+high)/2;
995         if (rp[t] > col) high = t;
996         else             low  = t;
997       }
998       for (i=low; i<high; i++) {
999         if (rp[i] > col) break;
1000         if (rp[i] == col) {
1001           bap  = ap +  16*i;
1002           for (ii=0; ii<4; ii++,value+=stepval) {
1003             for (jj=ii; jj<16; jj+=4) {
1004               bap[jj] += *value++;
1005             }
1006           }
1007           goto noinsert2;
1008         }
1009       }
1010       N = nrow++ - 1;
1011       high++; /* added new column index thus must search to one higher than before */
1012       /* shift up all the later entries in this row */
1013       for (ii=N; ii>=i; ii--) {
1014         rp[ii+1] = rp[ii];
1015         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1016       }
1017       if (N >= i) {
1018         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1019       }
1020       rp[i] = col;
1021       bap   = ap +  16*i;
1022       for (ii=0; ii<4; ii++,value+=stepval) {
1023         for (jj=ii; jj<16; jj+=4) {
1024           bap[jj] = *value++;
1025         }
1026       }
1027       noinsert2:;
1028       low = i;
1029     }
1030     ailen[row] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 EXTERN_C_END
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 EXTERN_C_BEGIN
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatSetValues4_"
1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1046 {
1047   Mat         A = *AA;
1048   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1049   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1050   PetscInt    *ai=a->i,*ailen=a->ilen;
1051   PetscInt    *aj=a->j,brow,bcol;
1052   PetscInt    ridx,cidx,lastcol = -1;
1053   MatScalar   *ap,value,*aa=a->a,*bap;
1054 
1055   PetscFunctionBegin;
1056   for (k=0; k<m; k++) { /* loop over added rows */
1057     row  = im[k]; brow = row/4;
1058     rp   = aj + ai[brow];
1059     ap   = aa + 16*ai[brow];
1060     nrow = ailen[brow];
1061     low  = 0;
1062     high = nrow;
1063     for (l=0; l<n; l++) { /* loop over added columns */
1064       col = in[l]; bcol = col/4;
1065       ridx = row % 4; cidx = col % 4;
1066       value = v[l + k*n];
1067       if (col <= lastcol) low = 0; else high = nrow;
1068       lastcol = col;
1069       while (high-low > 7) {
1070         t = (low+high)/2;
1071         if (rp[t] > bcol) high = t;
1072         else              low  = t;
1073       }
1074       for (i=low; i<high; i++) {
1075         if (rp[i] > bcol) break;
1076         if (rp[i] == bcol) {
1077           bap  = ap +  16*i + 4*cidx + ridx;
1078           *bap += value;
1079           goto noinsert1;
1080         }
1081       }
1082       N = nrow++ - 1;
1083       high++; /* added new column thus must search to one higher than before */
1084       /* shift up all the later entries in this row */
1085       for (ii=N; ii>=i; ii--) {
1086         rp[ii+1] = rp[ii];
1087         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1088       }
1089       if (N>=i) {
1090         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1091       }
1092       rp[i]                    = bcol;
1093       ap[16*i + 4*cidx + ridx] = value;
1094       noinsert1:;
1095       low = i;
1096     }
1097     ailen[brow] = nrow;
1098   }
1099   PetscFunctionReturnVoid();
1100 }
1101 EXTERN_C_END
1102 
1103 /*
1104      Checks for missing diagonals
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       *diag,*jj = a->j,i;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1116   *missing = PETSC_FALSE;
1117   if (A->rmap->n > 0 && !jj) {
1118     *missing  = PETSC_TRUE;
1119     if (d) *d = 0;
1120     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1121   } else {
1122     diag     = a->diag;
1123     for (i=0; i<a->mbs; i++) {
1124       if (jj[diag[i]] != i) {
1125         *missing  = PETSC_TRUE;
1126         if (d) *d = i;
1127         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1128       }
1129     }
1130   }
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1137 {
1138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       i,j,m = a->mbs;
1141 
1142   PetscFunctionBegin;
1143   if (!a->diag) {
1144     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1145     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1146     a->free_diag = PETSC_TRUE;
1147   }
1148   for (i=0; i<m; i++) {
1149     a->diag[i] = a->i[i+1];
1150     for (j=a->i[i]; j<a->i[i+1]; j++) {
1151       if (a->j[j] == i) {
1152         a->diag[i] = j;
1153         break;
1154       }
1155     }
1156   }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 
1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1166 {
1167   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1170   PetscInt       *tia, *tja;
1171 
1172   PetscFunctionBegin;
1173   *nn = n;
1174   if (!ia) PetscFunctionReturn(0);
1175   if (symmetric) {
1176     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1177     nz   = tia[n];
1178   } else {
1179     tia = a->i; tja = a->j;
1180   }
1181 
1182   if (!blockcompressed && bs > 1) {
1183     (*nn) *= bs;
1184     nbs    = bs;
1185     /* malloc & create the natural set of indices */
1186     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1187     if (n) {
1188       (*ia)[0] = 0;
1189       for (j=1; j<bs; j++) {
1190         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1191       }
1192     }
1193 
1194     for (i=1; i<n; i++) {
1195       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1196       for (j=1; j<bs; j++) {
1197         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1198       }
1199     }
1200     if (n) {
1201       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1202     }
1203 
1204     if (ja) {
1205       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1206       cnt = 0;
1207       for (i=0; i<n; i++) {
1208         for (j=0; j<bs; j++) {
1209           for (k=tia[i]; k<tia[i+1]; k++) {
1210             for (l=0; l<bs; l++) {
1211               (*ja)[cnt++] = bs*tja[k] + l;
1212 	    }
1213           }
1214         }
1215       }
1216     }
1217 
1218     n     *= bs;
1219     nz *= bs*bs;
1220     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1221       ierr = PetscFree(tia);CHKERRQ(ierr);
1222       ierr = PetscFree(tja);CHKERRQ(ierr);
1223     }
1224   } else if (oshift == 1) {
1225     if (symmetric) {
1226       PetscInt nz = tia[A->rmap->n/bs];
1227       /*  add 1 to i and j indices */
1228       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1229       *ia = tia;
1230       if (ja) {
1231 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1232         *ja = tja;
1233       }
1234     } else {
1235       PetscInt nz = a->i[A->rmap->n/bs];
1236       /* malloc space and  add 1 to i and j indices */
1237       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1238       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1239       if (ja) {
1240 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1241 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1242       }
1243     }
1244   } else {
1245     *ia = tia;
1246     if (ja) *ja = tja;
1247   }
1248 
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1255 {
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259   if (!ia) PetscFunctionReturn(0);
1260   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1261     ierr = PetscFree(*ia);CHKERRQ(ierr);
1262     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1270 {
1271   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1277 #endif
1278   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1279   if (a->row) {
1280     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1281   }
1282   if (a->col) {
1283     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1284   }
1285   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1286   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1287   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1288   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1289   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1290   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1291   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1292   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1293   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1294 
1295   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1296   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1297   ierr = PetscFree(a);CHKERRQ(ierr);
1298 
1299   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1314 {
1315   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   switch (op) {
1320   case MAT_ROW_ORIENTED:
1321     a->roworiented    = flg;
1322     break;
1323   case MAT_KEEP_NONZERO_PATTERN:
1324     a->keepnonzeropattern = flg;
1325     break;
1326   case MAT_NEW_NONZERO_LOCATIONS:
1327     a->nonew          = (flg ? 0 : 1);
1328     break;
1329   case MAT_NEW_NONZERO_LOCATION_ERR:
1330     a->nonew          = (flg ? -1 : 0);
1331     break;
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333     a->nonew          = (flg ? -2 : 0);
1334     break;
1335   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1336     a->nounused       = (flg ? -1 : 0);
1337     break;
1338   case MAT_NEW_DIAGONALS:
1339   case MAT_IGNORE_OFF_PROC_ENTRIES:
1340   case MAT_USE_HASH_TABLE:
1341     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1342     break;
1343   case MAT_SYMMETRIC:
1344   case MAT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_HERMITIAN:
1346   case MAT_SYMMETRY_ETERNAL:
1347     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1348     break;
1349   default:
1350     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358 {
1359   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1360   PetscErrorCode ierr;
1361   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1362   MatScalar      *aa,*aa_i;
1363   PetscScalar    *v_i;
1364 
1365   PetscFunctionBegin;
1366   bs  = A->rmap->bs;
1367   ai  = a->i;
1368   aj  = a->j;
1369   aa  = a->a;
1370   bs2 = a->bs2;
1371 
1372   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1373 
1374   bn  = row/bs;   /* Block number */
1375   bp  = row % bs; /* Block Position */
1376   M   = ai[bn+1] - ai[bn];
1377   *nz = bs*M;
1378 
1379   if (v) {
1380     *v = 0;
1381     if (*nz) {
1382       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1383       for (i=0; i<M; i++) { /* for each block in the block row */
1384         v_i  = *v + i*bs;
1385         aa_i = aa + bs2*(ai[bn] + i);
1386         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1387       }
1388     }
1389   }
1390 
1391   if (idx) {
1392     *idx = 0;
1393     if (*nz) {
1394       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1395       for (i=0; i<M; i++) { /* for each block in the block row */
1396         idx_i = *idx + i*bs;
1397         itmp  = bs*aj[ai[bn] + i];
1398         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1399       }
1400     }
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1413   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1422 {
1423   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1424   Mat            C;
1425   PetscErrorCode ierr;
1426   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1427   PetscInt       *rows,*cols,bs2=a->bs2;
1428   MatScalar      *array;
1429 
1430   PetscFunctionBegin;
1431   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1432   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1433     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1434     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1435 
1436     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1437     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1438     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1439     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1440     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1441     ierr = PetscFree(col);CHKERRQ(ierr);
1442   } else {
1443     C = *B;
1444   }
1445 
1446   array = a->a;
1447   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1448   for (i=0; i<mbs; i++) {
1449     cols[0] = i*bs;
1450     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1451     len = ai[i+1] - ai[i];
1452     for (j=0; j<len; j++) {
1453       rows[0] = (*aj++)*bs;
1454       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1455       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1456       array += bs2;
1457     }
1458   }
1459   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1460 
1461   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463 
1464   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1465     *B = C;
1466   } else {
1467     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1474 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1475 {
1476   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1479   int            fd;
1480   PetscScalar    *aa;
1481   FILE           *file;
1482 
1483   PetscFunctionBegin;
1484   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1485   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1486   col_lens[0] = MAT_FILE_CLASSID;
1487 
1488   col_lens[1] = A->rmap->N;
1489   col_lens[2] = A->cmap->n;
1490   col_lens[3] = a->nz*bs2;
1491 
1492   /* store lengths of each row and write (including header) to file */
1493   count = 0;
1494   for (i=0; i<a->mbs; i++) {
1495     for (j=0; j<bs; j++) {
1496       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1497     }
1498   }
1499   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1500   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1501 
1502   /* store column indices (zero start index) */
1503   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1504   count = 0;
1505   for (i=0; i<a->mbs; i++) {
1506     for (j=0; j<bs; j++) {
1507       for (k=a->i[i]; k<a->i[i+1]; k++) {
1508         for (l=0; l<bs; l++) {
1509           jj[count++] = bs*a->j[k] + l;
1510         }
1511       }
1512     }
1513   }
1514   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1515   ierr = PetscFree(jj);CHKERRQ(ierr);
1516 
1517   /* store nonzero values */
1518   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1519   count = 0;
1520   for (i=0; i<a->mbs; i++) {
1521     for (j=0; j<bs; j++) {
1522       for (k=a->i[i]; k<a->i[i+1]; k++) {
1523         for (l=0; l<bs; l++) {
1524           aa[count++] = a->a[bs2*k + l*bs + j];
1525         }
1526       }
1527     }
1528   }
1529   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1530   ierr = PetscFree(aa);CHKERRQ(ierr);
1531 
1532   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1533   if (file) {
1534     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1542 {
1543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1544   PetscErrorCode    ierr;
1545   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1546   PetscViewerFormat format;
1547 
1548   PetscFunctionBegin;
1549   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1550   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1551     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1552   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1553     Mat aij;
1554     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1555     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1556     ierr = MatDestroy(aij);CHKERRQ(ierr);
1557   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1558      PetscFunctionReturn(0);
1559   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1560     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1561     for (i=0; i<a->mbs; i++) {
1562       for (j=0; j<bs; j++) {
1563         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1564         for (k=a->i[i]; k<a->i[i+1]; k++) {
1565           for (l=0; l<bs; l++) {
1566 #if defined(PETSC_USE_COMPLEX)
1567             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1568               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1569                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1571               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1572                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1573             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1574               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1575             }
1576 #else
1577             if (a->a[bs2*k + l*bs + j] != 0.0) {
1578               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1579             }
1580 #endif
1581           }
1582         }
1583         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1584       }
1585     }
1586     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1587   } else {
1588     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1589     for (i=0; i<a->mbs; i++) {
1590       for (j=0; j<bs; j++) {
1591         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1592         for (k=a->i[i]; k<a->i[i+1]; k++) {
1593           for (l=0; l<bs; l++) {
1594 #if defined(PETSC_USE_COMPLEX)
1595             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1596               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1597                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1598             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1599               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1600                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1601             } else {
1602               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1603             }
1604 #else
1605             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1606 #endif
1607           }
1608         }
1609         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1610       }
1611     }
1612     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1613   }
1614   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1620 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1621 {
1622   Mat            A = (Mat) Aa;
1623   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1624   PetscErrorCode ierr;
1625   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1626   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1627   MatScalar      *aa;
1628   PetscViewer    viewer;
1629 
1630   PetscFunctionBegin;
1631 
1632   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1633   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1634 
1635   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1636 
1637   /* loop over matrix elements drawing boxes */
1638   color = PETSC_DRAW_BLUE;
1639   for (i=0,row=0; i<mbs; i++,row+=bs) {
1640     for (j=a->i[i]; j<a->i[i+1]; j++) {
1641       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643       aa = a->a + j*bs2;
1644       for (k=0; k<bs; k++) {
1645         for (l=0; l<bs; l++) {
1646           if (PetscRealPart(*aa++) >=  0.) continue;
1647           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648         }
1649       }
1650     }
1651   }
1652   color = PETSC_DRAW_CYAN;
1653   for (i=0,row=0; i<mbs; i++,row+=bs) {
1654     for (j=a->i[i]; j<a->i[i+1]; j++) {
1655       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657       aa = a->a + j*bs2;
1658       for (k=0; k<bs; k++) {
1659         for (l=0; l<bs; l++) {
1660           if (PetscRealPart(*aa++) != 0.) continue;
1661           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1662         }
1663       }
1664     }
1665   }
1666 
1667   color = PETSC_DRAW_RED;
1668   for (i=0,row=0; i<mbs; i++,row+=bs) {
1669     for (j=a->i[i]; j<a->i[i+1]; j++) {
1670       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672       aa = a->a + j*bs2;
1673       for (k=0; k<bs; k++) {
1674         for (l=0; l<bs; l++) {
1675           if (PetscRealPart(*aa++) <= 0.) continue;
1676           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1677         }
1678       }
1679     }
1680   }
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687 {
1688   PetscErrorCode ierr;
1689   PetscReal      xl,yl,xr,yr,w,h;
1690   PetscDraw      draw;
1691   PetscTruth     isnull;
1692 
1693   PetscFunctionBegin;
1694 
1695   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1696   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1697 
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1699   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700   xr += w;    yr += h;  xl = -w;     yl = -h;
1701   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1702   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1703   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatView_SeqBAIJ"
1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710 {
1711   PetscErrorCode ierr;
1712   PetscTruth     iascii,isbinary,isdraw;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1717   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1718   if (iascii){
1719     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1720   } else if (isbinary) {
1721     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1722   } else if (isdraw) {
1723     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1724   } else {
1725     Mat B;
1726     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1727     ierr = MatView(B,viewer);CHKERRQ(ierr);
1728     ierr = MatDestroy(B);CHKERRQ(ierr);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1736 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1737 {
1738   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1739   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1740   PetscInt    *ai = a->i,*ailen = a->ilen;
1741   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1742   MatScalar   *ap,*aa = a->a;
1743 
1744   PetscFunctionBegin;
1745   for (k=0; k<m; k++) { /* loop over rows */
1746     row  = im[k]; brow = row/bs;
1747     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1748     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1749     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1750     nrow = ailen[brow];
1751     for (l=0; l<n; l++) { /* loop over columns */
1752       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1753       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1754       col  = in[l] ;
1755       bcol = col/bs;
1756       cidx = col%bs;
1757       ridx = row%bs;
1758       high = nrow;
1759       low  = 0; /* assume unsorted */
1760       while (high-low > 5) {
1761         t = (low+high)/2;
1762         if (rp[t] > bcol) high = t;
1763         else             low  = t;
1764       }
1765       for (i=low; i<high; i++) {
1766         if (rp[i] > bcol) break;
1767         if (rp[i] == bcol) {
1768           *v++ = ap[bs2*i+bs*cidx+ridx];
1769           goto finished;
1770         }
1771       }
1772       *v++ = 0.0;
1773       finished:;
1774     }
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1947   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1950   A->info.mallocs     += a->reallocs;
1951   a->reallocs          = 0;
1952   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1953 
1954   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1955   if (a->compressedrow.use){
1956     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1957   }
1958 
1959   A->same_nonzero = PETSC_TRUE;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 /*
1964    This function returns an array of flags which indicate the locations of contiguous
1965    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1966    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1967    Assume: sizes should be long enough to hold all the values.
1968 */
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1972 {
1973   PetscInt   i,j,k,row;
1974   PetscTruth flg;
1975 
1976   PetscFunctionBegin;
1977   for (i=0,j=0; i<n; j++) {
1978     row = idx[i];
1979     if (row%bs!=0) { /* Not the begining of a block */
1980       sizes[j] = 1;
1981       i++;
1982     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1983       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1984       i++;
1985     } else { /* Begining of the block, so check if the complete block exists */
1986       flg = PETSC_TRUE;
1987       for (k=1; k<bs; k++) {
1988         if (row+k != idx[i+k]) { /* break in the block */
1989           flg = PETSC_FALSE;
1990           break;
1991         }
1992       }
1993       if (flg) { /* No break in the bs */
1994         sizes[j] = bs;
1995         i+= bs;
1996       } else {
1997         sizes[j] = 1;
1998         i++;
1999       }
2000     }
2001   }
2002   *bs_max = j;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2008 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2009 {
2010   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2011   PetscErrorCode ierr;
2012   PetscInt       i,j,k,count,*rows;
2013   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2014   PetscScalar    zero = 0.0;
2015   MatScalar      *aa;
2016 
2017   PetscFunctionBegin;
2018   /* Make a copy of the IS and  sort it */
2019   /* allocate memory for rows,sizes */
2020   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2021 
2022   /* copy IS values to rows, and sort them */
2023   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2024   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2025   if (baij->keepnonzeropattern) {
2026     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2027     bs_max = is_n;
2028     A->same_nonzero = PETSC_TRUE;
2029   } else {
2030     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2031     A->same_nonzero = PETSC_FALSE;
2032   }
2033 
2034   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2035     row   = rows[j];
2036     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2037     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2038     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2039     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2040       if (diag != 0.0) {
2041         if (baij->ilen[row/bs] > 0) {
2042           baij->ilen[row/bs]       = 1;
2043           baij->j[baij->i[row/bs]] = row/bs;
2044           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2045         }
2046         /* Now insert all the diagonal values for this bs */
2047         for (k=0; k<bs; k++) {
2048           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2049         }
2050       } else { /* (diag == 0.0) */
2051         baij->ilen[row/bs] = 0;
2052       } /* end (diag == 0.0) */
2053     } else { /* (sizes[i] != bs) */
2054 #if defined (PETSC_USE_DEBUG)
2055       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2056 #endif
2057       for (k=0; k<count; k++) {
2058         aa[0] =  zero;
2059         aa    += bs;
2060       }
2061       if (diag != 0.0) {
2062         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2063       }
2064     }
2065   }
2066 
2067   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2074 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2075 {
2076   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2077   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2078   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2079   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2080   PetscErrorCode ierr;
2081   PetscInt       ridx,cidx,bs2=a->bs2;
2082   PetscTruth     roworiented=a->roworiented;
2083   MatScalar      *ap,value,*aa=a->a,*bap;
2084 
2085   PetscFunctionBegin;
2086   if (v) PetscValidScalarPointer(v,6);
2087   for (k=0; k<m; k++) { /* loop over added rows */
2088     row  = im[k];
2089     brow = row/bs;
2090     if (row < 0) continue;
2091 #if defined(PETSC_USE_DEBUG)
2092     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2093 #endif
2094     rp   = aj + ai[brow];
2095     ap   = aa + bs2*ai[brow];
2096     rmax = imax[brow];
2097     nrow = ailen[brow];
2098     low  = 0;
2099     high = nrow;
2100     for (l=0; l<n; l++) { /* loop over added columns */
2101       if (in[l] < 0) continue;
2102 #if defined(PETSC_USE_DEBUG)
2103       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2104 #endif
2105       col = in[l]; bcol = col/bs;
2106       ridx = row % bs; cidx = col % bs;
2107       if (roworiented) {
2108         value = v[l + k*n];
2109       } else {
2110         value = v[k + l*m];
2111       }
2112       if (col <= lastcol) low = 0; else high = nrow;
2113       lastcol = col;
2114       while (high-low > 7) {
2115         t = (low+high)/2;
2116         if (rp[t] > bcol) high = t;
2117         else              low  = t;
2118       }
2119       for (i=low; i<high; i++) {
2120         if (rp[i] > bcol) break;
2121         if (rp[i] == bcol) {
2122           bap  = ap +  bs2*i + bs*cidx + ridx;
2123           if (is == ADD_VALUES) *bap += value;
2124           else                  *bap  = value;
2125           goto noinsert1;
2126         }
2127       }
2128       if (nonew == 1) goto noinsert1;
2129       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2130       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2131       N = nrow++ - 1; high++;
2132       /* shift up all the later entries in this row */
2133       for (ii=N; ii>=i; ii--) {
2134         rp[ii+1] = rp[ii];
2135         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2136       }
2137       if (N>=i) {
2138         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2139       }
2140       rp[i]                      = bcol;
2141       ap[bs2*i + bs*cidx + ridx] = value;
2142       a->nz++;
2143       noinsert1:;
2144       low = i;
2145     }
2146     ailen[brow] = nrow;
2147   }
2148   A->same_nonzero = PETSC_FALSE;
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2154 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2155 {
2156   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2157   Mat            outA;
2158   PetscErrorCode ierr;
2159   PetscTruth     row_identity,col_identity;
2160 
2161   PetscFunctionBegin;
2162   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2163   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2164   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2165   if (!row_identity || !col_identity) {
2166     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2167   }
2168 
2169   outA            = inA;
2170   inA->factortype = MAT_FACTOR_LU;
2171 
2172   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2173 
2174   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2175   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2176   a->row = row;
2177   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2178   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2179   a->col = col;
2180 
2181   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2182   if (a->icol) {
2183     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2184   }
2185   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2186   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2187 
2188   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2189   if (!a->solve_work) {
2190     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2191     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2192   }
2193   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2194 
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 EXTERN_C_BEGIN
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2201 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2202 {
2203   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2204   PetscInt    i,nz,mbs;
2205 
2206   PetscFunctionBegin;
2207   nz  = baij->maxnz/baij->bs2;
2208   mbs = baij->mbs;
2209   for (i=0; i<nz; i++) {
2210     baij->j[i] = indices[i];
2211   }
2212   baij->nz = nz;
2213   for (i=0; i<mbs; i++) {
2214     baij->ilen[i] = baij->imax[i];
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 EXTERN_C_END
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2222 /*@
2223     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2224        in the matrix.
2225 
2226   Input Parameters:
2227 +  mat - the SeqBAIJ matrix
2228 -  indices - the column indices
2229 
2230   Level: advanced
2231 
2232   Notes:
2233     This can be called if you have precomputed the nonzero structure of the
2234   matrix and want to provide it to the matrix object to improve the performance
2235   of the MatSetValues() operation.
2236 
2237     You MUST have set the correct numbers of nonzeros per row in the call to
2238   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2239 
2240     MUST be called before any calls to MatSetValues();
2241 
2242 @*/
2243 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2244 {
2245   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2246 
2247   PetscFunctionBegin;
2248   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2249   PetscValidPointer(indices,2);
2250   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2251   if (f) {
2252     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2253   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2259 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2260 {
2261   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2262   PetscErrorCode ierr;
2263   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2264   PetscReal      atmp;
2265   PetscScalar    *x,zero = 0.0;
2266   MatScalar      *aa;
2267   PetscInt       ncols,brow,krow,kcol;
2268 
2269   PetscFunctionBegin;
2270   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2271   bs   = A->rmap->bs;
2272   aa   = a->a;
2273   ai   = a->i;
2274   aj   = a->j;
2275   mbs  = a->mbs;
2276 
2277   ierr = VecSet(v,zero);CHKERRQ(ierr);
2278   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2279   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2280   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2281   for (i=0; i<mbs; i++) {
2282     ncols = ai[1] - ai[0]; ai++;
2283     brow  = bs*i;
2284     for (j=0; j<ncols; j++){
2285       for (kcol=0; kcol<bs; kcol++){
2286         for (krow=0; krow<bs; krow++){
2287           atmp = PetscAbsScalar(*aa);aa++;
2288           row = brow + krow;    /* row index */
2289           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2290           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2291         }
2292       }
2293       aj++;
2294     }
2295   }
2296   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatCopy_SeqBAIJ"
2302 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2303 {
2304   PetscErrorCode ierr;
2305 
2306   PetscFunctionBegin;
2307   /* If the two matrices have the same copy implementation, use fast copy. */
2308   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2309     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2310     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2311 
2312     if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2313     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2314   } else {
2315     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2322 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2323 {
2324   PetscErrorCode ierr;
2325 
2326   PetscFunctionBegin;
2327   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2333 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2334 {
2335   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2336   PetscFunctionBegin;
2337   *array = a->a;
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2343 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2344 {
2345   PetscFunctionBegin;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2351 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2352 {
2353   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2354   PetscErrorCode ierr;
2355   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2356   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2357 
2358   PetscFunctionBegin;
2359   if (str == SAME_NONZERO_PATTERN) {
2360     PetscScalar alpha = a;
2361     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2362   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2363     if (y->xtoy && y->XtoY != X) {
2364       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2365       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2366     }
2367     if (!y->xtoy) { /* get xtoy */
2368       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2369       y->XtoY = X;
2370       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2371     }
2372     bs2 = bs*bs;
2373     for (i=0; i<x->nz; i++) {
2374       j = 0;
2375       while (j < bs2){
2376         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2377         j++;
2378       }
2379     }
2380     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2381   } else {
2382     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2389 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2390 {
2391   PetscInt rbs,cbs;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2396   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2397   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2398   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2404 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2405 {
2406   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2407   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2408   MatScalar      *aa = a->a;
2409 
2410   PetscFunctionBegin;
2411   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2417 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2418 {
2419   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2420   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2421   MatScalar      *aa = a->a;
2422 
2423   PetscFunctionBegin;
2424   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2432 /*
2433     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2434 */
2435 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2436 {
2437   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2440   PetscInt       nz = a->i[m],row,*jj,mr,col;
2441 
2442   PetscFunctionBegin;
2443   *nn = n;
2444   if (!ia) PetscFunctionReturn(0);
2445   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2446   else {
2447     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2448     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2449     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2450     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2451     jj = a->j;
2452     for (i=0; i<nz; i++) {
2453       collengths[jj[i]]++;
2454     }
2455     cia[0] = oshift;
2456     for (i=0; i<n; i++) {
2457       cia[i+1] = cia[i] + collengths[i];
2458     }
2459     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2460     jj   = a->j;
2461     for (row=0; row<m; row++) {
2462       mr = a->i[row+1] - a->i[row];
2463       for (i=0; i<mr; i++) {
2464         col = *jj++;
2465         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2466       }
2467     }
2468     ierr = PetscFree(collengths);CHKERRQ(ierr);
2469     *ia = cia; *ja = cja;
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2476 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2477 {
2478   PetscErrorCode ierr;
2479 
2480   PetscFunctionBegin;
2481   if (!ia) PetscFunctionReturn(0);
2482   ierr = PetscFree(*ia);CHKERRQ(ierr);
2483   ierr = PetscFree(*ja);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2489 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2490 {
2491   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2492   PetscErrorCode ierr;
2493   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2494   PetscScalar    dx,*y,*xx,*w3_array;
2495   PetscScalar    *vscale_array;
2496   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2497   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2498   void           *fctx = coloring->fctx;
2499   PetscTruth     flg = PETSC_FALSE;
2500   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2501   Vec            x1_tmp;
2502 
2503   PetscFunctionBegin;
2504   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2505   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2506   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2507   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2508 
2509   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2510   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2511   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2512   if (flg) {
2513     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2514   } else {
2515     PetscTruth assembled;
2516     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2517     if (assembled) {
2518       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2519     }
2520   }
2521 
2522   x1_tmp = x1;
2523   if (!coloring->vscale){
2524     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2525   }
2526 
2527   /*
2528     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2529     coloring->F for the coarser grids from the finest
2530   */
2531   if (coloring->F) {
2532     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2533     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2534     if (m1 != m2) {
2535       coloring->F = 0;
2536       }
2537     }
2538 
2539   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2540     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2541   }
2542   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2543 
2544   /* Set w1 = F(x1) */
2545   if (coloring->F) {
2546     w1          = coloring->F; /* use already computed value of function */
2547     coloring->F = 0;
2548   } else {
2549     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2550     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2551     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2552   }
2553 
2554   if (!coloring->w3) {
2555     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2556     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2557   }
2558   w3 = coloring->w3;
2559 
2560     CHKMEMQ;
2561     /* Compute all the local scale factors, including ghost points */
2562   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2563   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2564   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2565   if (ctype == IS_COLORING_GHOSTED){
2566     col_start = 0; col_end = N;
2567   } else if (ctype == IS_COLORING_GLOBAL){
2568     xx = xx - start;
2569     vscale_array = vscale_array - start;
2570     col_start = start; col_end = N + start;
2571   }    CHKMEMQ;
2572   for (col=col_start; col<col_end; col++){
2573     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2574     if (coloring->htype[0] == 'w') {
2575       dx = 1.0 + unorm;
2576     } else {
2577       dx  = xx[col];
2578     }
2579     if (dx == 0.0) dx = 1.0;
2580 #if !defined(PETSC_USE_COMPLEX)
2581     if (dx < umin && dx >= 0.0)      dx = umin;
2582     else if (dx < 0.0 && dx > -umin) dx = -umin;
2583 #else
2584     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2585     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2586 #endif
2587     dx               *= epsilon;
2588     vscale_array[col] = 1.0/dx;
2589   }     CHKMEMQ;
2590   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2591   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2592   if (ctype == IS_COLORING_GLOBAL){
2593     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2594     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2595   }
2596   CHKMEMQ;
2597   if (coloring->vscaleforrow) {
2598     vscaleforrow = coloring->vscaleforrow;
2599   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2600 
2601   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2602   /*
2603     Loop over each color
2604   */
2605   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2606   for (k=0; k<coloring->ncolors; k++) {
2607     coloring->currentcolor = k;
2608     for (i=0; i<bs; i++) {
2609       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2610       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2611       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2612       /*
2613 	Loop over each column associated with color
2614 	adding the perturbation to the vector w3.
2615       */
2616       for (l=0; l<coloring->ncolumns[k]; l++) {
2617 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2618 	if (coloring->htype[0] == 'w') {
2619 	  dx = 1.0 + unorm;
2620 	} else {
2621 	  dx  = xx[col];
2622 	}
2623 	if (dx == 0.0) dx = 1.0;
2624 #if !defined(PETSC_USE_COMPLEX)
2625 	if (dx < umin && dx >= 0.0)      dx = umin;
2626 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2627 #else
2628 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2629 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2630 #endif
2631 	dx            *= epsilon;
2632 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2633 	w3_array[col] += dx;
2634       }
2635       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2636       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2637 
2638       /*
2639 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2640 	w2 = F(x1 + dx) - F(x1)
2641       */
2642       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2643       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2644       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2645       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2646 
2647       /*
2648 	Loop over rows of vector, putting results into Jacobian matrix
2649       */
2650       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2651       for (l=0; l<coloring->nrows[k]; l++) {
2652 	row    = bs*coloring->rows[k][l];             /* local row index */
2653 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2654         for (j=0; j<bs; j++) {
2655   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2656           srows[j]  = row + start + j;
2657         }
2658 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2659       }
2660       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2661     }
2662   } /* endof for each color */
2663   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2664   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2665   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2666   ierr = PetscFree(srows);CHKERRQ(ierr);
2667 
2668   coloring->currentcolor = -1;
2669   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2670   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2671   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 /* -------------------------------------------------------------------*/
2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677        MatGetRow_SeqBAIJ,
2678        MatRestoreRow_SeqBAIJ,
2679        MatMult_SeqBAIJ_N,
2680 /* 4*/ MatMultAdd_SeqBAIJ_N,
2681        MatMultTranspose_SeqBAIJ,
2682        MatMultTransposeAdd_SeqBAIJ,
2683        0,
2684        0,
2685        0,
2686 /*10*/ 0,
2687        MatLUFactor_SeqBAIJ,
2688        0,
2689        0,
2690        MatTranspose_SeqBAIJ,
2691 /*15*/ MatGetInfo_SeqBAIJ,
2692        MatEqual_SeqBAIJ,
2693        MatGetDiagonal_SeqBAIJ,
2694        MatDiagonalScale_SeqBAIJ,
2695        MatNorm_SeqBAIJ,
2696 /*20*/ 0,
2697        MatAssemblyEnd_SeqBAIJ,
2698        MatSetOption_SeqBAIJ,
2699        MatZeroEntries_SeqBAIJ,
2700 /*24*/ MatZeroRows_SeqBAIJ,
2701        0,
2702        0,
2703        0,
2704        0,
2705 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2706        0,
2707        0,
2708        MatGetArray_SeqBAIJ,
2709        MatRestoreArray_SeqBAIJ,
2710 /*34*/ MatDuplicate_SeqBAIJ,
2711        0,
2712        0,
2713        MatILUFactor_SeqBAIJ,
2714        0,
2715 /*39*/ MatAXPY_SeqBAIJ,
2716        MatGetSubMatrices_SeqBAIJ,
2717        MatIncreaseOverlap_SeqBAIJ,
2718        MatGetValues_SeqBAIJ,
2719        MatCopy_SeqBAIJ,
2720 /*44*/ 0,
2721        MatScale_SeqBAIJ,
2722        0,
2723        0,
2724        0,
2725 /*49*/ MatSetBlockSize_SeqBAIJ,
2726        MatGetRowIJ_SeqBAIJ,
2727        MatRestoreRowIJ_SeqBAIJ,
2728        MatGetColumnIJ_SeqBAIJ,
2729        MatRestoreColumnIJ_SeqBAIJ,
2730 /*54*/ MatFDColoringCreate_SeqAIJ,
2731        0,
2732        0,
2733        0,
2734        MatSetValuesBlocked_SeqBAIJ,
2735 /*59*/ MatGetSubMatrix_SeqBAIJ,
2736        MatDestroy_SeqBAIJ,
2737        MatView_SeqBAIJ,
2738        0,
2739        0,
2740 /*64*/ 0,
2741        0,
2742        0,
2743        0,
2744        0,
2745 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2746        0,
2747        MatConvert_Basic,
2748        0,
2749        0,
2750 /*74*/ 0,
2751        MatFDColoringApply_BAIJ,
2752        0,
2753        0,
2754        0,
2755 /*79*/ 0,
2756        0,
2757        0,
2758        0,
2759        MatLoad_SeqBAIJ,
2760 /*84*/ 0,
2761        0,
2762        0,
2763        0,
2764        0,
2765 /*89*/ 0,
2766        0,
2767        0,
2768        0,
2769        0,
2770 /*94*/ 0,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*99*/0,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*104*/0,
2781        MatRealPart_SeqBAIJ,
2782        MatImaginaryPart_SeqBAIJ,
2783        0,
2784        0,
2785 /*109*/0,
2786        0,
2787        0,
2788        0,
2789        MatMissingDiagonal_SeqBAIJ,
2790 /*114*/0,
2791        0,
2792        0,
2793        0,
2794        0,
2795 /*119*/0,
2796        0,
2797        MatMultHermitianTranspose_SeqBAIJ,
2798        MatMultHermitianTransposeAdd_SeqBAIJ,
2799        MatLoadnew_SeqBAIJ
2800 };
2801 
2802 EXTERN_C_BEGIN
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2805 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2806 {
2807   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2808   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2813 
2814   /* allocate space for values if not already there */
2815   if (!aij->saved_values) {
2816     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2817     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2818   }
2819 
2820   /* copy values over */
2821   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 EXTERN_C_END
2825 
2826 EXTERN_C_BEGIN
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2829 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2830 {
2831   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2832   PetscErrorCode ierr;
2833   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2834 
2835   PetscFunctionBegin;
2836   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2837   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2838 
2839   /* copy values over */
2840   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 EXTERN_C_END
2844 
2845 EXTERN_C_BEGIN
2846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2847 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2848 EXTERN_C_END
2849 
2850 EXTERN_C_BEGIN
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2854 {
2855   Mat_SeqBAIJ    *b;
2856   PetscErrorCode ierr;
2857   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2858   PetscTruth     flg,skipallocation = PETSC_FALSE;
2859 
2860   PetscFunctionBegin;
2861 
2862   if (nz == MAT_SKIP_ALLOCATION) {
2863     skipallocation = PETSC_TRUE;
2864     nz             = 0;
2865   }
2866 
2867   if (bs < 0) {
2868     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2869       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2870     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2871     bs   = PetscAbs(bs);
2872   }
2873   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2874   bs   = newbs;
2875 
2876   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2877   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2878   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2879   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2880 
2881   B->preallocated = PETSC_TRUE;
2882 
2883   mbs  = B->rmap->n/bs;
2884   nbs  = B->cmap->n/bs;
2885   bs2  = bs*bs;
2886 
2887   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2888 
2889   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2890   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2891   if (nnz) {
2892     for (i=0; i<mbs; i++) {
2893       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2894       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2895     }
2896   }
2897 
2898   b       = (Mat_SeqBAIJ*)B->data;
2899   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2900     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2901   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2902 
2903   if (!flg) {
2904     switch (bs) {
2905     case 1:
2906       B->ops->mult            = MatMult_SeqBAIJ_1;
2907       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2908       B->ops->sor             = MatSOR_SeqBAIJ_1;
2909       break;
2910     case 2:
2911       B->ops->mult            = MatMult_SeqBAIJ_2;
2912       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2913       B->ops->sor             = MatSOR_SeqBAIJ_2;
2914       break;
2915     case 3:
2916       B->ops->mult            = MatMult_SeqBAIJ_3;
2917       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2918       B->ops->sor             = MatSOR_SeqBAIJ_3;
2919       break;
2920     case 4:
2921       B->ops->mult            = MatMult_SeqBAIJ_4;
2922       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2923       B->ops->sor             = MatSOR_SeqBAIJ_4;
2924       break;
2925     case 5:
2926       B->ops->mult            = MatMult_SeqBAIJ_5;
2927       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2928       B->ops->sor             = MatSOR_SeqBAIJ_5;
2929       break;
2930     case 6:
2931       B->ops->mult            = MatMult_SeqBAIJ_6;
2932       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2933       B->ops->sor             = MatSOR_SeqBAIJ_6;
2934       break;
2935     case 7:
2936       B->ops->mult            = MatMult_SeqBAIJ_7;
2937       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2938       B->ops->sor             = MatSOR_SeqBAIJ_7;
2939       break;
2940     case 15:
2941       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2942       break;
2943     default:
2944       B->ops->mult            = MatMult_SeqBAIJ_N;
2945       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2946       break;
2947     }
2948   }
2949   B->rmap->bs      = bs;
2950   b->mbs     = mbs;
2951   b->nbs     = nbs;
2952   if (!skipallocation) {
2953     if (!b->imax) {
2954       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2955       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2956       b->free_imax_ilen = PETSC_TRUE;
2957     }
2958     /* b->ilen will count nonzeros in each block row so far. */
2959     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2960     if (!nnz) {
2961       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2962       else if (nz <= 0)        nz = 1;
2963       for (i=0; i<mbs; i++) b->imax[i] = nz;
2964       nz = nz*mbs;
2965     } else {
2966       nz = 0;
2967       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2968     }
2969 
2970     /* allocate the matrix space */
2971     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2972     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2973     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2974     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2975     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2976     b->singlemalloc = PETSC_TRUE;
2977     b->i[0] = 0;
2978     for (i=1; i<mbs+1; i++) {
2979       b->i[i] = b->i[i-1] + b->imax[i-1];
2980     }
2981     b->free_a     = PETSC_TRUE;
2982     b->free_ij    = PETSC_TRUE;
2983   } else {
2984     b->free_a     = PETSC_FALSE;
2985     b->free_ij    = PETSC_FALSE;
2986   }
2987 
2988   B->rmap->bs          = bs;
2989   b->bs2              = bs2;
2990   b->mbs              = mbs;
2991   b->nz               = 0;
2992   b->maxnz            = nz*bs2;
2993   B->info.nz_unneeded = (PetscReal)b->maxnz;
2994   PetscFunctionReturn(0);
2995 }
2996 EXTERN_C_END
2997 
2998 EXTERN_C_BEGIN
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3001 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3002 {
3003   PetscInt       i,m,nz,nz_max=0,*nnz;
3004   PetscScalar    *values=0;
3005   PetscErrorCode ierr;
3006 
3007   PetscFunctionBegin;
3008   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3009   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3010   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3011   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3012   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3013   m = B->rmap->n/bs;
3014 
3015   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3016   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3017   for(i=0; i<m; i++) {
3018     nz = ii[i+1]- ii[i];
3019     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3020     nz_max = PetscMax(nz_max, nz);
3021     nnz[i] = nz;
3022   }
3023   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3024   ierr = PetscFree(nnz);CHKERRQ(ierr);
3025 
3026   values = (PetscScalar*)V;
3027   if (!values) {
3028     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3029     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3030   }
3031   for (i=0; i<m; i++) {
3032     PetscInt          ncols  = ii[i+1] - ii[i];
3033     const PetscInt    *icols = jj + ii[i];
3034     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3035     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3036   }
3037   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3038   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040 
3041   PetscFunctionReturn(0);
3042 }
3043 EXTERN_C_END
3044 
3045 
3046 EXTERN_C_BEGIN
3047 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3048 #if defined(PETSC_HAVE_MUMPS)
3049 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3050 #endif
3051 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3052 EXTERN_C_END
3053 
3054 /*MC
3055    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3056    block sparse compressed row format.
3057 
3058    Options Database Keys:
3059 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3060 
3061   Level: beginner
3062 
3063 .seealso: MatCreateSeqBAIJ()
3064 M*/
3065 
3066 
3067 EXTERN_C_BEGIN
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatCreate_SeqBAIJ"
3070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3071 {
3072   PetscErrorCode ierr;
3073   PetscMPIInt    size;
3074   Mat_SeqBAIJ    *b;
3075 
3076   PetscFunctionBegin;
3077   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3078   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3079 
3080   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3081   B->data = (void*)b;
3082   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3083   B->mapping               = 0;
3084   b->row                   = 0;
3085   b->col                   = 0;
3086   b->icol                  = 0;
3087   b->reallocs              = 0;
3088   b->saved_values          = 0;
3089 
3090   b->roworiented           = PETSC_TRUE;
3091   b->nonew                 = 0;
3092   b->diag                  = 0;
3093   b->solve_work            = 0;
3094   b->mult_work             = 0;
3095   B->spptr                 = 0;
3096   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3097   b->keepnonzeropattern    = PETSC_FALSE;
3098   b->xtoy                  = 0;
3099   b->XtoY                  = 0;
3100   b->compressedrow.use     = PETSC_FALSE;
3101   b->compressedrow.nrows   = 0;
3102   b->compressedrow.i       = PETSC_NULL;
3103   b->compressedrow.rindex  = PETSC_NULL;
3104   b->compressedrow.checked = PETSC_FALSE;
3105   B->same_nonzero          = PETSC_FALSE;
3106 
3107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3108                                      "MatGetFactorAvailable_seqbaij_petsc",
3109                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3111                                      "MatGetFactor_seqbaij_petsc",
3112                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3113 #if defined(PETSC_HAVE_MUMPS)
3114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3115 #endif
3116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3117                                      "MatInvertBlockDiagonal_SeqBAIJ",
3118                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3120                                      "MatStoreValues_SeqBAIJ",
3121                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3123                                      "MatRetrieveValues_SeqBAIJ",
3124                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3126                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3127                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3129                                      "MatConvert_SeqBAIJ_SeqAIJ",
3130                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3132                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3133                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3135                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3136                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3138                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3139                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3140   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 EXTERN_C_END
3144 
3145 #undef __FUNCT__
3146 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3147 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3148 {
3149   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3150   PetscErrorCode ierr;
3151   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3152 
3153   PetscFunctionBegin;
3154   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3155 
3156   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3157     c->imax = a->imax;
3158     c->ilen = a->ilen;
3159     c->free_imax_ilen = PETSC_FALSE;
3160   } else {
3161     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3162     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3163     for (i=0; i<mbs; i++) {
3164       c->imax[i] = a->imax[i];
3165       c->ilen[i] = a->ilen[i];
3166     }
3167     c->free_imax_ilen = PETSC_TRUE;
3168   }
3169 
3170   /* allocate the matrix space */
3171   if (mallocmatspace){
3172     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3173       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3174       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3175       c->singlemalloc = PETSC_FALSE;
3176       c->free_ij      = PETSC_FALSE;
3177       c->i            = a->i;
3178       c->j            = a->j;
3179       c->parent       = A;
3180       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3181       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3182       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3183     } else {
3184       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3185       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3186       c->singlemalloc = PETSC_TRUE;
3187       c->free_ij      = PETSC_TRUE;
3188       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3189       if (mbs > 0) {
3190 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3191 	if (cpvalues == MAT_COPY_VALUES) {
3192 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3193 	} else {
3194 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3195 	}
3196       }
3197     }
3198   }
3199 
3200   c->roworiented = a->roworiented;
3201   c->nonew       = a->nonew;
3202   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3203   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3204   c->bs2         = a->bs2;
3205   c->mbs         = a->mbs;
3206   c->nbs         = a->nbs;
3207 
3208   if (a->diag) {
3209     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3210       c->diag      = a->diag;
3211       c->free_diag = PETSC_FALSE;
3212     } else {
3213       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3214       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3215       for (i=0; i<mbs; i++) {
3216         c->diag[i] = a->diag[i];
3217       }
3218       c->free_diag = PETSC_TRUE;
3219     }
3220   } else c->diag        = 0;
3221   c->nz                 = a->nz;
3222   c->maxnz              = bs2*a->nz; /* Since we allocate exactly the right amount */
3223   c->solve_work         = 0;
3224   c->mult_work          = 0;
3225   c->free_a             = PETSC_TRUE;
3226   c->free_ij            = PETSC_TRUE;
3227   C->preallocated       = PETSC_TRUE;
3228   C->assembled          = PETSC_TRUE;
3229 
3230   c->compressedrow.use     = a->compressedrow.use;
3231   c->compressedrow.nrows   = a->compressedrow.nrows;
3232   c->compressedrow.checked = a->compressedrow.checked;
3233   if (a->compressedrow.checked && a->compressedrow.use){
3234     i = a->compressedrow.nrows;
3235     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3236     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3237     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3238     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3239   } else {
3240     c->compressedrow.use    = PETSC_FALSE;
3241     c->compressedrow.i      = PETSC_NULL;
3242     c->compressedrow.rindex = PETSC_NULL;
3243   }
3244   C->same_nonzero = A->same_nonzero;
3245   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3246   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 #undef __FUNCT__
3251 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3252 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3253 {
3254     PetscErrorCode ierr;
3255 
3256   PetscFunctionBegin;
3257   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3258   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3259   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3260   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatLoad_SeqBAIJ"
3266 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3267 {
3268   Mat_SeqBAIJ    *a;
3269   Mat            B;
3270   PetscErrorCode ierr;
3271   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3272   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3273   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3274   PetscInt       *masked,nmask,tmp,bs2,ishift;
3275   PetscMPIInt    size;
3276   int            fd;
3277   PetscScalar    *aa;
3278   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3279 
3280   PetscFunctionBegin;
3281   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3282     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3283   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3284   bs2  = bs*bs;
3285 
3286   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3287   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3288   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3289   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3290   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3291   M = header[1]; N = header[2]; nz = header[3];
3292 
3293   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3294   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3295 
3296   /*
3297      This code adds extra rows to make sure the number of rows is
3298     divisible by the blocksize
3299   */
3300   mbs        = M/bs;
3301   extra_rows = bs - M + bs*(mbs);
3302   if (extra_rows == bs) extra_rows = 0;
3303   else                  mbs++;
3304   if (extra_rows) {
3305     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3306   }
3307 
3308   /* read in row lengths */
3309   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3310   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3311   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3312 
3313   /* read in column indices */
3314   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3315   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3316   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3317 
3318   /* loop over row lengths determining block row lengths */
3319   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3320   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3321   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3322   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3323   rowcount = 0;
3324   nzcount = 0;
3325   for (i=0; i<mbs; i++) {
3326     nmask = 0;
3327     for (j=0; j<bs; j++) {
3328       kmax = rowlengths[rowcount];
3329       for (k=0; k<kmax; k++) {
3330         tmp = jj[nzcount++]/bs;
3331         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3332       }
3333       rowcount++;
3334     }
3335     browlengths[i] += nmask;
3336     /* zero out the mask elements we set */
3337     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3338   }
3339 
3340   /* create our matrix */
3341   ierr = MatCreate(comm,&B);
3342   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3343   ierr = MatSetType(B,type);CHKERRQ(ierr);
3344   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3345   a = (Mat_SeqBAIJ*)B->data;
3346 
3347   /* set matrix "i" values */
3348   a->i[0] = 0;
3349   for (i=1; i<= mbs; i++) {
3350     a->i[i]      = a->i[i-1] + browlengths[i-1];
3351     a->ilen[i-1] = browlengths[i-1];
3352   }
3353   a->nz         = 0;
3354   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3355 
3356   /* read in nonzero values */
3357   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3358   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3359   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3360 
3361   /* set "a" and "j" values into matrix */
3362   nzcount = 0; jcount = 0;
3363   for (i=0; i<mbs; i++) {
3364     nzcountb = nzcount;
3365     nmask    = 0;
3366     for (j=0; j<bs; j++) {
3367       kmax = rowlengths[i*bs+j];
3368       for (k=0; k<kmax; k++) {
3369         tmp = jj[nzcount++]/bs;
3370 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3371       }
3372     }
3373     /* sort the masked values */
3374     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3375 
3376     /* set "j" values into matrix */
3377     maskcount = 1;
3378     for (j=0; j<nmask; j++) {
3379       a->j[jcount++]  = masked[j];
3380       mask[masked[j]] = maskcount++;
3381     }
3382     /* set "a" values into matrix */
3383     ishift = bs2*a->i[i];
3384     for (j=0; j<bs; j++) {
3385       kmax = rowlengths[i*bs+j];
3386       for (k=0; k<kmax; k++) {
3387         tmp       = jj[nzcountb]/bs ;
3388         block     = mask[tmp] - 1;
3389         point     = jj[nzcountb] - bs*tmp;
3390         idx       = ishift + bs2*block + j + bs*point;
3391         a->a[idx] = (MatScalar)aa[nzcountb++];
3392       }
3393     }
3394     /* zero out the mask elements we set */
3395     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3396   }
3397   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3398 
3399   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3400   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3401   ierr = PetscFree(aa);CHKERRQ(ierr);
3402   ierr = PetscFree(jj);CHKERRQ(ierr);
3403   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3404 
3405   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3406   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3407   ierr = MatView_Private(B);CHKERRQ(ierr);
3408 
3409   *A = B;
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatLoadnew_SeqBAIJ"
3415 PetscErrorCode MatLoadnew_SeqBAIJ(PetscViewer viewer,Mat newmat)
3416 {
3417   Mat_SeqBAIJ    *a;
3418   PetscErrorCode ierr;
3419   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3420   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3421   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3422   PetscInt       *masked,nmask,tmp,bs2,ishift;
3423   PetscMPIInt    size;
3424   int            fd;
3425   PetscScalar    *aa;
3426   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3427 
3428   PetscFunctionBegin;
3429   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3430   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3431   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3432   bs2  = bs*bs;
3433 
3434   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3435   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3436   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3437   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3438   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3439   M = header[1]; N = header[2]; nz = header[3];
3440 
3441   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3442   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3443 
3444   /*
3445      This code adds extra rows to make sure the number of rows is
3446     divisible by the blocksize
3447   */
3448   mbs        = M/bs;
3449   extra_rows = bs - M + bs*(mbs);
3450   if (extra_rows == bs) extra_rows = 0;
3451   else                  mbs++;
3452   if (extra_rows) {
3453     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3454   }
3455 
3456   /* Set global sizes if not already set */
3457   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3458     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3459   } else { /* Check if the matrix global sizes are correct */
3460     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3461     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3462   }
3463 
3464   /* read in row lengths */
3465   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3466   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3467   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3468 
3469   /* read in column indices */
3470   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3471   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3472   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3473 
3474   /* loop over row lengths determining block row lengths */
3475   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3476   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3477   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3478   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3479   rowcount = 0;
3480   nzcount = 0;
3481   for (i=0; i<mbs; i++) {
3482     nmask = 0;
3483     for (j=0; j<bs; j++) {
3484       kmax = rowlengths[rowcount];
3485       for (k=0; k<kmax; k++) {
3486         tmp = jj[nzcount++]/bs;
3487         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3488       }
3489       rowcount++;
3490     }
3491     browlengths[i] += nmask;
3492     /* zero out the mask elements we set */
3493     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3494   }
3495 
3496   /* Do preallocation  */
3497   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3498   a = (Mat_SeqBAIJ*)newmat->data;
3499 
3500   /* set matrix "i" values */
3501   a->i[0] = 0;
3502   for (i=1; i<= mbs; i++) {
3503     a->i[i]      = a->i[i-1] + browlengths[i-1];
3504     a->ilen[i-1] = browlengths[i-1];
3505   }
3506   a->nz         = 0;
3507   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3508 
3509   /* read in nonzero values */
3510   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3511   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3512   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3513 
3514   /* set "a" and "j" values into matrix */
3515   nzcount = 0; jcount = 0;
3516   for (i=0; i<mbs; i++) {
3517     nzcountb = nzcount;
3518     nmask    = 0;
3519     for (j=0; j<bs; j++) {
3520       kmax = rowlengths[i*bs+j];
3521       for (k=0; k<kmax; k++) {
3522         tmp = jj[nzcount++]/bs;
3523 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3524       }
3525     }
3526     /* sort the masked values */
3527     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3528 
3529     /* set "j" values into matrix */
3530     maskcount = 1;
3531     for (j=0; j<nmask; j++) {
3532       a->j[jcount++]  = masked[j];
3533       mask[masked[j]] = maskcount++;
3534     }
3535     /* set "a" values into matrix */
3536     ishift = bs2*a->i[i];
3537     for (j=0; j<bs; j++) {
3538       kmax = rowlengths[i*bs+j];
3539       for (k=0; k<kmax; k++) {
3540         tmp       = jj[nzcountb]/bs ;
3541         block     = mask[tmp] - 1;
3542         point     = jj[nzcountb] - bs*tmp;
3543         idx       = ishift + bs2*block + j + bs*point;
3544         a->a[idx] = (MatScalar)aa[nzcountb++];
3545       }
3546     }
3547     /* zero out the mask elements we set */
3548     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3549   }
3550   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3551 
3552   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3553   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3554   ierr = PetscFree(aa);CHKERRQ(ierr);
3555   ierr = PetscFree(jj);CHKERRQ(ierr);
3556   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3557 
3558   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatCreateSeqBAIJ"
3566 /*@C
3567    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3568    compressed row) format.  For good matrix assembly performance the
3569    user should preallocate the matrix storage by setting the parameter nz
3570    (or the array nnz).  By setting these parameters accurately, performance
3571    during matrix assembly can be increased by more than a factor of 50.
3572 
3573    Collective on MPI_Comm
3574 
3575    Input Parameters:
3576 +  comm - MPI communicator, set to PETSC_COMM_SELF
3577 .  bs - size of block
3578 .  m - number of rows
3579 .  n - number of columns
3580 .  nz - number of nonzero blocks  per block row (same for all rows)
3581 -  nnz - array containing the number of nonzero blocks in the various block rows
3582          (possibly different for each block row) or PETSC_NULL
3583 
3584    Output Parameter:
3585 .  A - the matrix
3586 
3587    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3588    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3589    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3590 
3591    Options Database Keys:
3592 .   -mat_no_unroll - uses code that does not unroll the loops in the
3593                      block calculations (much slower)
3594 .    -mat_block_size - size of the blocks to use
3595 
3596    Level: intermediate
3597 
3598    Notes:
3599    The number of rows and columns must be divisible by blocksize.
3600 
3601    If the nnz parameter is given then the nz parameter is ignored
3602 
3603    A nonzero block is any block that as 1 or more nonzeros in it
3604 
3605    The block AIJ format is fully compatible with standard Fortran 77
3606    storage.  That is, the stored row and column indices can begin at
3607    either one (as in Fortran) or zero.  See the users' manual for details.
3608 
3609    Specify the preallocated storage with either nz or nnz (not both).
3610    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3611    allocation.  For additional details, see the users manual chapter on
3612    matrices.
3613 
3614 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3615 @*/
3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3617 {
3618   PetscErrorCode ierr;
3619 
3620   PetscFunctionBegin;
3621   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3622   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3623   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3624   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3630 /*@C
3631    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3632    per row in the matrix. For good matrix assembly performance the
3633    user should preallocate the matrix storage by setting the parameter nz
3634    (or the array nnz).  By setting these parameters accurately, performance
3635    during matrix assembly can be increased by more than a factor of 50.
3636 
3637    Collective on MPI_Comm
3638 
3639    Input Parameters:
3640 +  A - the matrix
3641 .  bs - size of block
3642 .  nz - number of block nonzeros per block row (same for all rows)
3643 -  nnz - array containing the number of block nonzeros in the various block rows
3644          (possibly different for each block row) or PETSC_NULL
3645 
3646    Options Database Keys:
3647 .   -mat_no_unroll - uses code that does not unroll the loops in the
3648                      block calculations (much slower)
3649 .    -mat_block_size - size of the blocks to use
3650 
3651    Level: intermediate
3652 
3653    Notes:
3654    If the nnz parameter is given then the nz parameter is ignored
3655 
3656    You can call MatGetInfo() to get information on how effective the preallocation was;
3657    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3658    You can also run with the option -info and look for messages with the string
3659    malloc in them to see if additional memory allocation was needed.
3660 
3661    The block AIJ format is fully compatible with standard Fortran 77
3662    storage.  That is, the stored row and column indices can begin at
3663    either one (as in Fortran) or zero.  See the users' manual for details.
3664 
3665    Specify the preallocated storage with either nz or nnz (not both).
3666    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3667    allocation.  For additional details, see the users manual chapter on
3668    matrices.
3669 
3670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3671 @*/
3672 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3673 {
3674   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3675 
3676   PetscFunctionBegin;
3677   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3678   if (f) {
3679     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3680   }
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 #undef __FUNCT__
3685 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3686 /*@C
3687    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3688    (the default sequential PETSc format).
3689 
3690    Collective on MPI_Comm
3691 
3692    Input Parameters:
3693 +  A - the matrix
3694 .  i - the indices into j for the start of each local row (starts with zero)
3695 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3696 -  v - optional values in the matrix
3697 
3698    Level: developer
3699 
3700 .keywords: matrix, aij, compressed row, sparse
3701 
3702 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3703 @*/
3704 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3705 {
3706   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3707 
3708   PetscFunctionBegin;
3709   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3710   if (f) {
3711     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3712   }
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 
3717 #undef __FUNCT__
3718 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3719 /*@
3720      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3721               (upper triangular entries in CSR format) provided by the user.
3722 
3723      Collective on MPI_Comm
3724 
3725    Input Parameters:
3726 +  comm - must be an MPI communicator of size 1
3727 .  bs - size of block
3728 .  m - number of rows
3729 .  n - number of columns
3730 .  i - row indices
3731 .  j - column indices
3732 -  a - matrix values
3733 
3734    Output Parameter:
3735 .  mat - the matrix
3736 
3737    Level: intermediate
3738 
3739    Notes:
3740        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3741     once the matrix is destroyed
3742 
3743        You cannot set new nonzero locations into this matrix, that will generate an error.
3744 
3745        The i and j indices are 0 based
3746 
3747 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3748 
3749 @*/
3750 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3751 {
3752   PetscErrorCode ierr;
3753   PetscInt       ii;
3754   Mat_SeqBAIJ    *baij;
3755 
3756   PetscFunctionBegin;
3757   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3758   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3759 
3760   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3761   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3762   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3763   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3764   baij = (Mat_SeqBAIJ*)(*mat)->data;
3765   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3766   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3767 
3768   baij->i = i;
3769   baij->j = j;
3770   baij->a = a;
3771   baij->singlemalloc = PETSC_FALSE;
3772   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3773   baij->free_a       = PETSC_FALSE;
3774   baij->free_ij       = PETSC_FALSE;
3775 
3776   for (ii=0; ii<m; ii++) {
3777     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3778 #if defined(PETSC_USE_DEBUG)
3779     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3780 #endif
3781   }
3782 #if defined(PETSC_USE_DEBUG)
3783   for (ii=0; ii<baij->i[m]; ii++) {
3784     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3785     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3786   }
3787 #endif
3788 
3789   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3791   PetscFunctionReturn(0);
3792 }
3793