xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 27bdab1e45eef8b231b07730c1b1dd8ccdf290d8)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs2*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     case 7:
129       for (i=0; i<mbs; i++) {
130         odiag = v + 49*diag_offset[i];
131         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
132         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
133 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
134 	diag  += 49;
135 	mdiag += 49;
136       }
137       break;
138     default:
139       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
140       for (i=0; i<mbs; i++) {
141         odiag = v + bs2*diag_offset[i];
142         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
143         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
144         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
145 	diag  += bs2;
146 	mdiag += bs2;
147       }
148       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
149   }
150   a->idiagvalid = PETSC_TRUE;
151   PetscFunctionReturn(0);
152 }
153 EXTERN_C_END
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
158 {
159   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
160   PetscScalar        *x,x1,s1;
161   const PetscScalar  *b;
162   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
163   PetscErrorCode     ierr;
164   PetscInt           m = a->mbs,i,i2,nz,j;
165   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
166 
167   PetscFunctionBegin;
168   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
169   its = its*lits;
170   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
171   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
172   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
173   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
174   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
175 
176   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
177 
178   diag  = a->diag;
179   idiag = a->idiag;
180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182 
183   if (flag & SOR_ZERO_INITIAL_GUESS) {
184     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
185       x[0] = b[0]*idiag[0];
186       i2     = 1;
187       idiag += 1;
188       for (i=1; i<m; i++) {
189         v     = aa + ai[i];
190         vi    = aj + ai[i];
191         nz    = diag[i] - ai[i];
192         s1    = b[i2];
193         for (j=0; j<nz; j++) {
194           s1 -= v[j]*x[vi[j]];
195         }
196         x[i2]   = idiag[0]*s1;
197         idiag   += 1;
198         i2      += 1;
199       }
200       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
201       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
202     }
203     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
204         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
205       i2    = 0;
206       mdiag = a->idiag+a->mbs;
207       for (i=0; i<m; i++) {
208         x1      = x[i2];
209         x[i2]   = mdiag[0]*x1;
210         mdiag  += 1;
211         i2     += 1;
212       }
213       ierr = PetscLogFlops(m);CHKERRQ(ierr);
214     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
215       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
216     }
217     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
218       idiag   = a->idiag+a->mbs - 1;
219       i2      = m - 1;
220       x1      = x[i2];
221       x[i2]   = idiag[0]*x1;
222       idiag -= 1;
223       i2    -= 1;
224       for (i=m-2; i>=0; i--) {
225         v     = aa + (diag[i]+1);
226         vi    = aj + diag[i] + 1;
227         nz    = ai[i+1] - diag[i] - 1;
228         s1    = x[i2];
229         for (j=0; j<nz; j++) {
230           s1 -= v[j]*x[vi[j]];
231         }
232         x[i2]   = idiag[0]*s1;
233         idiag   -= 1;
234         i2      -= 1;
235       }
236       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
237     }
238   } else {
239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
240   }
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
249 {
250   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
251   PetscScalar        *x,x1,x2,s1,s2;
252   const PetscScalar  *b;
253   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
254   PetscErrorCode     ierr;
255   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
256   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
257 
258   PetscFunctionBegin;
259   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
260   its = its*lits;
261   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
262   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
263   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
264   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
265   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
266 
267   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
268 
269   diag  = a->diag;
270   idiag = a->idiag;
271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
273 
274   if (flag & SOR_ZERO_INITIAL_GUESS) {
275     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
276       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
277       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
278       i2     = 2;
279       idiag += 4;
280       for (i=1; i<m; i++) {
281 	v     = aa + 4*ai[i];
282 	vi    = aj + ai[i];
283 	nz    = diag[i] - ai[i];
284 	s1    = b[i2]; s2 = b[i2+1];
285         for (j=0; j<nz; j++) {
286 	  idx  = 2*vi[j];
287           it   = 4*j;
288 	  x1   = x[idx]; x2 = x[1+idx];
289 	  s1  -= v[it]*x1 + v[it+2]*x2;
290 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
291 	}
292 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
293 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
294         idiag   += 4;
295         i2      += 2;
296       }
297       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
298       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
299     }
300     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
301         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
302       i2    = 0;
303       mdiag = a->idiag+4*a->mbs;
304       for (i=0; i<m; i++) {
305         x1      = x[i2]; x2 = x[i2+1];
306         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
307         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
308         mdiag  += 4;
309         i2     += 2;
310       }
311       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
312     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
313       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
314     }
315     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
316       idiag   = a->idiag+4*a->mbs - 4;
317       i2      = 2*m - 2;
318       x1      = x[i2]; x2 = x[i2+1];
319       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
320       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
321       idiag -= 4;
322       i2    -= 2;
323       for (i=m-2; i>=0; i--) {
324 	v     = aa + 4*(diag[i]+1);
325 	vi    = aj + diag[i] + 1;
326 	nz    = ai[i+1] - diag[i] - 1;
327 	s1    = x[i2]; s2 = x[i2+1];
328         for (j=0; j<nz; j++) {
329  	  idx  = 2*vi[j];
330           it   = 4*j;
331 	  x1   = x[idx]; x2 = x[1+idx];
332 	  s1  -= v[it]*x1 + v[it+2]*x2;
333 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
334 	}
335 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
336 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
337         idiag   -= 4;
338         i2      -= 2;
339       }
340       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
341     }
342   } else {
343     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
344   }
345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
346   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
353 {
354   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
355   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
356   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
357   const PetscScalar  *b;
358   PetscErrorCode     ierr;
359   PetscInt           m = a->mbs,i,i2,nz,idx;
360   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
361 
362   PetscFunctionBegin;
363   its = its*lits;
364   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
365   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
366   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
367   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
368   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
369   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
370 
371   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
372 
373   diag  = a->diag;
374   idiag = a->idiag;
375   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
376   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
377 
378   if (flag & SOR_ZERO_INITIAL_GUESS) {
379     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
380       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
381       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
382       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
383       i2     = 3;
384       idiag += 9;
385       for (i=1; i<m; i++) {
386         v     = aa + 9*ai[i];
387         vi    = aj + ai[i];
388         nz    = diag[i] - ai[i];
389         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
390         while (nz--) {
391           idx  = 3*(*vi++);
392           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
393           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
394           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
395           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
396           v   += 9;
397         }
398         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
399         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
400         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
401         idiag   += 9;
402         i2      += 3;
403       }
404       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
405       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
406     }
407     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
408         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
409       i2    = 0;
410       mdiag = a->idiag+9*a->mbs;
411       for (i=0; i<m; i++) {
412         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
413         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
414         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
415         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
416         mdiag  += 9;
417         i2     += 3;
418       }
419       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
420     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
421       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
422     }
423     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
424       idiag   = a->idiag+9*a->mbs - 9;
425       i2      = 3*m - 3;
426       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
427       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
428       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
429       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
430       idiag -= 9;
431       i2    -= 3;
432       for (i=m-2; i>=0; i--) {
433         v     = aa + 9*(diag[i]+1);
434         vi    = aj + diag[i] + 1;
435         nz    = ai[i+1] - diag[i] - 1;
436         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
437         while (nz--) {
438           idx  = 3*(*vi++);
439           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
440           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
441           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
442           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
443           v   += 9;
444         }
445         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
446         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
447         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
448         idiag   -= 9;
449         i2      -= 3;
450       }
451       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
452     }
453   } else {
454     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
455   }
456   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
464 {
465   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
466   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
467   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
468   const PetscScalar  *b;
469   PetscErrorCode     ierr;
470   PetscInt           m = a->mbs,i,i2,nz,idx;
471   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
472 
473   PetscFunctionBegin;
474   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
475   its = its*lits;
476   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
477   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
478   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
479   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
480   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
481 
482   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
483 
484   diag  = a->diag;
485   idiag = a->idiag;
486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
488 
489   if (flag & SOR_ZERO_INITIAL_GUESS) {
490     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
491       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
492       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
493       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
494       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
495       i2     = 4;
496       idiag += 16;
497       for (i=1; i<m; i++) {
498 	v     = aa + 16*ai[i];
499 	vi    = aj + ai[i];
500 	nz    = diag[i] - ai[i];
501 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
502 	while (nz--) {
503 	  idx  = 4*(*vi++);
504 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
505 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
506 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
507 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
508 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
509 	  v   += 16;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
512 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
513 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
514 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
515         idiag   += 16;
516         i2      += 4;
517       }
518       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
519       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
520     }
521     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
522         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
523       i2    = 0;
524       mdiag = a->idiag+16*a->mbs;
525       for (i=0; i<m; i++) {
526         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
527         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
528         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
529         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
530         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
531         mdiag  += 16;
532         i2     += 4;
533       }
534       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
535     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
537     }
538     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
539       idiag   = a->idiag+16*a->mbs - 16;
540       i2      = 4*m - 4;
541       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
542       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
543       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
544       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
545       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
546       idiag -= 16;
547       i2    -= 4;
548       for (i=m-2; i>=0; i--) {
549 	v     = aa + 16*(diag[i]+1);
550 	vi    = aj + diag[i] + 1;
551 	nz    = ai[i+1] - diag[i] - 1;
552 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
553 	while (nz--) {
554 	  idx  = 4*(*vi++);
555 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
556 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
557 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
558 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
559 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
560 	  v   += 16;
561 	}
562 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
563 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
564 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
565 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
566         idiag   -= 16;
567         i2      -= 4;
568       }
569       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
570     }
571   } else {
572     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
573   }
574   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
575   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
582 {
583   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
584   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
585   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
586   const PetscScalar  *b;
587   PetscErrorCode     ierr;
588   PetscInt           m = a->mbs,i,i2,nz,idx;
589   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
590 
591   PetscFunctionBegin;
592   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
593   its = its*lits;
594   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
595   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
596   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
597   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
598   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
599 
600   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
601 
602   diag  = a->diag;
603   idiag = a->idiag;
604   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
606 
607   if (flag & SOR_ZERO_INITIAL_GUESS) {
608     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
609       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
610       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
611       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
612       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
613       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
614       i2     = 5;
615       idiag += 25;
616       for (i=1; i<m; i++) {
617 	v     = aa + 25*ai[i];
618 	vi    = aj + ai[i];
619 	nz    = diag[i] - ai[i];
620 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
621 	while (nz--) {
622 	  idx  = 5*(*vi++);
623 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
624 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
625 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
626 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
627 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
628 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
629 	  v   += 25;
630 	}
631 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
632 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
633 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
634 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
635 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
636         idiag   += 25;
637         i2      += 5;
638       }
639       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
640       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
641     }
642     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
643         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
644       i2    = 0;
645       mdiag = a->idiag+25*a->mbs;
646       for (i=0; i<m; i++) {
647         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
648         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
649         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
650         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
651         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
652         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
653         mdiag  += 25;
654         i2     += 5;
655       }
656       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
657     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
658       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
659     }
660     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
661       idiag   = a->idiag+25*a->mbs - 25;
662       i2      = 5*m - 5;
663       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
664       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
665       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
666       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
667       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
668       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
669       idiag -= 25;
670       i2    -= 5;
671       for (i=m-2; i>=0; i--) {
672 	v     = aa + 25*(diag[i]+1);
673 	vi    = aj + diag[i] + 1;
674 	nz    = ai[i+1] - diag[i] - 1;
675 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
676 	while (nz--) {
677 	  idx  = 5*(*vi++);
678 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
679 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
680 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
681 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
682 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
683 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
684 	  v   += 25;
685 	}
686 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
687 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
688 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
689 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
690 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
691         idiag   -= 25;
692         i2      -= 5;
693       }
694       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
695     }
696   } else {
697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
698   }
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
707 {
708   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
709   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
710   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
711   const PetscScalar  *b;
712   PetscErrorCode     ierr;
713   PetscInt           m = a->mbs,i,i2,nz,idx;
714   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
715 
716   PetscFunctionBegin;
717   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
718   its = its*lits;
719   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
720   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
721   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
722   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
723   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
724 
725   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
726 
727   diag  = a->diag;
728   idiag = a->idiag;
729   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
730   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
731 
732   if (flag & SOR_ZERO_INITIAL_GUESS) {
733     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
734       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
735       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
736       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
737       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
738       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
739       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
740       i2     = 6;
741       idiag += 36;
742       for (i=1; i<m; i++) {
743         v     = aa + 36*ai[i];
744         vi    = aj + ai[i];
745         nz    = diag[i] - ai[i];
746         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
747         while (nz--) {
748           idx  = 6*(*vi++);
749           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
750           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
751           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
752           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
753           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
754           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
755           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
756           v   += 36;
757         }
758         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
759         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
760         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
761         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
762         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
763         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
764         idiag   += 36;
765         i2      += 6;
766       }
767       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
768       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
769     }
770     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
771         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
772       i2    = 0;
773       mdiag = a->idiag+36*a->mbs;
774       for (i=0; i<m; i++) {
775         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
776         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
777         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
778         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
779         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
780         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
781         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
782         mdiag  += 36;
783         i2     += 6;
784       }
785       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
786     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
787       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
788     }
789     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
790       idiag   = a->idiag+36*a->mbs - 36;
791       i2      = 6*m - 6;
792       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
793       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
794       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
795       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
796       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
797       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
798       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
799       idiag -= 36;
800       i2    -= 6;
801       for (i=m-2; i>=0; i--) {
802         v     = aa + 36*(diag[i]+1);
803         vi    = aj + diag[i] + 1;
804         nz    = ai[i+1] - diag[i] - 1;
805         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
806         while (nz--) {
807           idx  = 6*(*vi++);
808           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
809           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
810           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
811           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
812           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
813           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
814           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
815           v   += 36;
816         }
817         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
818         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
819         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
820         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
821         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
822         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
823         idiag   -= 36;
824         i2      -= 6;
825       }
826       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
827     }
828   } else {
829     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
830   }
831   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
839 {
840   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
841   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
842   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
843   const PetscScalar  *b;
844   PetscErrorCode     ierr;
845   PetscInt           m = a->mbs,i,i2,nz,idx;
846   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
847 
848   PetscFunctionBegin;
849   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
850   its = its*lits;
851   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
852   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
853   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
854   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
855   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
856 
857   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
858 
859   diag  = a->diag;
860   idiag = a->idiag;
861   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
862   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
863 
864   if (flag & SOR_ZERO_INITIAL_GUESS) {
865     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
866       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
867       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
868       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
869       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
870       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
871       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
872       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
873       i2     = 7;
874       idiag += 49;
875       for (i=1; i<m; i++) {
876         v     = aa + 49*ai[i];
877         vi    = aj + ai[i];
878         nz    = diag[i] - ai[i];
879         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
880         while (nz--) {
881           idx  = 7*(*vi++);
882           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
883           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
884           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
885           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
886           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
887           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
888           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
889           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
890           v   += 49;
891         }
892         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
893         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
894         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
895         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
896         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
897         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
898         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
899         idiag   += 49;
900         i2      += 7;
901       }
902       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
903       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
904     }
905     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
906         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
907       i2    = 0;
908       mdiag = a->idiag+49*a->mbs;
909       for (i=0; i<m; i++) {
910         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
912         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
913         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
914         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
915         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
916         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
917         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
918         mdiag  += 49;
919         i2     += 7;
920       }
921       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
922     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
923       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
924     }
925     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
926       idiag   = a->idiag+49*a->mbs - 49;
927       i2      = 7*m - 7;
928       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
929       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
930       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
931       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
932       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
933       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
934       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
935       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
936       idiag -= 49;
937       i2    -= 7;
938       for (i=m-2; i>=0; i--) {
939         v     = aa + 49*(diag[i]+1);
940         vi    = aj + diag[i] + 1;
941         nz    = ai[i+1] - diag[i] - 1;
942         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
943         while (nz--) {
944           idx  = 7*(*vi++);
945           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
946           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
947           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
948           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
949           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
950           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
951           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
952           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
953           v   += 49;
954         }
955         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
956         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
957         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
958         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
959         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
960         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
961         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
962         idiag   -= 49;
963         i2      -= 7;
964       }
965       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
966     }
967   } else {
968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
969   }
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
978 {
979   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
980   PetscScalar        *x,*work,*w,*workt;
981   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
982   const PetscScalar  *b;
983   PetscErrorCode     ierr;
984   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
985   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
986 
987   PetscFunctionBegin;
988   its = its*lits;
989   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
990   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
991   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
992   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
993   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
994   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
995 
996   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
997 
998   diag  = a->diag;
999   idiag = a->idiag;
1000   if (!a->mult_work) {
1001     k    = PetscMax(A->rmap->n,A->cmap->n);
1002     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1003   }
1004   work = a->mult_work;
1005   if (!a->sor_work) {
1006     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1007   }
1008   w = a->sor_work;
1009 
1010   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1011   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1012 
1013   if (flag & SOR_ZERO_INITIAL_GUESS) {
1014     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1015       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1016       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1017       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1018       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1019       i2     = bs;
1020       idiag += bs2;
1021       for (i=1; i<m; i++) {
1022         v     = aa + bs2*ai[i];
1023         vi    = aj + ai[i];
1024         nz    = diag[i] - ai[i];
1025 
1026         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1027 	/* copy all rows of x that are needed into contiguous space */
1028         workt = work;
1029         for (j=0; j<nz; j++) {
1030           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1031           workt += bs;
1032         }
1033         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1034        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1035         while (nz--) {
1036           idx  = N*(*vi++);
1037           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1038           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1039           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1040           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1041           v   += N2;
1042 	  } */
1043 
1044         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1045         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1046         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1047         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1048 
1049         idiag   += bs2;
1050         i2      += bs;
1051       }
1052       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1053       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1054     }
1055     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1056         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1057       i2    = 0;
1058       mdiag = a->idiag+bs2*a->mbs;
1059       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1060       for (i=0; i<m; i++) {
1061         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1062         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1063         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1064         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1065         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1066 
1067         mdiag  += bs2;
1068         i2     += bs;
1069       }
1070       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1071     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1072       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1073     }
1074     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1075       idiag   = a->idiag+bs2*a->mbs - bs2;
1076       i2      = bs*m - bs;
1077       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1078       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1079       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1080       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1081       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1082       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1083       idiag -= bs2;
1084       i2    -= bs;
1085       for (i=m-2; i>=0; i--) {
1086         v     = aa + bs2*(diag[i]+1);
1087         vi    = aj + diag[i] + 1;
1088         nz    = ai[i+1] - diag[i] - 1;
1089 
1090         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1091 	/* copy all rows of x that are needed into contiguous space */
1092         workt = work;
1093         for (j=0; j<nz; j++) {
1094           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1095           workt += bs;
1096         }
1097         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1098         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1099         while (nz--) {
1100           idx  = N*(*vi++);
1101           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1102           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1103           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1104           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1105           v   += N2;
1106 	  } */
1107 
1108         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1109         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1110         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1111         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1112         idiag   -= bs2;
1113         i2      -= bs;
1114       }
1115       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1116     }
1117   } else {
1118     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1119   }
1120   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1121   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1127 */
1128 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1131 #define matsetvaluesblocked4_ matsetvaluesblocked4
1132 #endif
1133 
1134 EXTERN_C_BEGIN
1135 #undef __FUNCT__
1136 #define __FUNCT__ "matsetvaluesblocked4_"
1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1138 {
1139   Mat               A = *AA;
1140   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1141   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1142   PetscInt          *ai=a->i,*ailen=a->ilen;
1143   PetscInt          *aj=a->j,stepval,lastcol = -1;
1144   const PetscScalar *value = v;
1145   MatScalar         *ap,*aa = a->a,*bap;
1146 
1147   PetscFunctionBegin;
1148   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1149   stepval = (n-1)*4;
1150   for (k=0; k<m; k++) { /* loop over added rows */
1151     row  = im[k];
1152     rp   = aj + ai[row];
1153     ap   = aa + 16*ai[row];
1154     nrow = ailen[row];
1155     low  = 0;
1156     high = nrow;
1157     for (l=0; l<n; l++) { /* loop over added columns */
1158       col = in[l];
1159       if (col <= lastcol) low = 0; else high = nrow;
1160       lastcol = col;
1161       value = v + k*(stepval+4 + l)*4;
1162       while (high-low > 7) {
1163         t = (low+high)/2;
1164         if (rp[t] > col) high = t;
1165         else             low  = t;
1166       }
1167       for (i=low; i<high; i++) {
1168         if (rp[i] > col) break;
1169         if (rp[i] == col) {
1170           bap  = ap +  16*i;
1171           for (ii=0; ii<4; ii++,value+=stepval) {
1172             for (jj=ii; jj<16; jj+=4) {
1173               bap[jj] += *value++;
1174             }
1175           }
1176           goto noinsert2;
1177         }
1178       }
1179       N = nrow++ - 1;
1180       high++; /* added new column index thus must search to one higher than before */
1181       /* shift up all the later entries in this row */
1182       for (ii=N; ii>=i; ii--) {
1183         rp[ii+1] = rp[ii];
1184         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1185       }
1186       if (N >= i) {
1187         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1188       }
1189       rp[i] = col;
1190       bap   = ap +  16*i;
1191       for (ii=0; ii<4; ii++,value+=stepval) {
1192         for (jj=ii; jj<16; jj+=4) {
1193           bap[jj] = *value++;
1194         }
1195       }
1196       noinsert2:;
1197       low = i;
1198     }
1199     ailen[row] = nrow;
1200   }
1201   PetscFunctionReturnVoid();
1202 }
1203 EXTERN_C_END
1204 
1205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1206 #define matsetvalues4_ MATSETVALUES4
1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1208 #define matsetvalues4_ matsetvalues4
1209 #endif
1210 
1211 EXTERN_C_BEGIN
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatSetValues4_"
1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1215 {
1216   Mat         A = *AA;
1217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1218   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1219   PetscInt    *ai=a->i,*ailen=a->ilen;
1220   PetscInt    *aj=a->j,brow,bcol;
1221   PetscInt    ridx,cidx,lastcol = -1;
1222   MatScalar   *ap,value,*aa=a->a,*bap;
1223 
1224   PetscFunctionBegin;
1225   for (k=0; k<m; k++) { /* loop over added rows */
1226     row  = im[k]; brow = row/4;
1227     rp   = aj + ai[brow];
1228     ap   = aa + 16*ai[brow];
1229     nrow = ailen[brow];
1230     low  = 0;
1231     high = nrow;
1232     for (l=0; l<n; l++) { /* loop over added columns */
1233       col = in[l]; bcol = col/4;
1234       ridx = row % 4; cidx = col % 4;
1235       value = v[l + k*n];
1236       if (col <= lastcol) low = 0; else high = nrow;
1237       lastcol = col;
1238       while (high-low > 7) {
1239         t = (low+high)/2;
1240         if (rp[t] > bcol) high = t;
1241         else              low  = t;
1242       }
1243       for (i=low; i<high; i++) {
1244         if (rp[i] > bcol) break;
1245         if (rp[i] == bcol) {
1246           bap  = ap +  16*i + 4*cidx + ridx;
1247           *bap += value;
1248           goto noinsert1;
1249         }
1250       }
1251       N = nrow++ - 1;
1252       high++; /* added new column thus must search to one higher than before */
1253       /* shift up all the later entries in this row */
1254       for (ii=N; ii>=i; ii--) {
1255         rp[ii+1] = rp[ii];
1256         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1257       }
1258       if (N>=i) {
1259         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1260       }
1261       rp[i]                    = bcol;
1262       ap[16*i + 4*cidx + ridx] = value;
1263       noinsert1:;
1264       low = i;
1265     }
1266     ailen[brow] = nrow;
1267   }
1268   PetscFunctionReturnVoid();
1269 }
1270 EXTERN_C_END
1271 
1272 /*
1273      Checks for missing diagonals
1274 */
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1278 {
1279   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281   PetscInt       *diag,*jj = a->j,i;
1282 
1283   PetscFunctionBegin;
1284   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1285   *missing = PETSC_FALSE;
1286   if (A->rmap->n > 0 && !jj) {
1287     *missing  = PETSC_TRUE;
1288     if (d) *d = 0;
1289     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1290   } else {
1291     diag     = a->diag;
1292     for (i=0; i<a->mbs; i++) {
1293       if (jj[diag[i]] != i) {
1294         *missing  = PETSC_TRUE;
1295         if (d) *d = i;
1296         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1297       }
1298     }
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,m = a->mbs;
1310 
1311   PetscFunctionBegin;
1312   if (!a->diag) {
1313     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1314     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1315     a->free_diag = PETSC_TRUE;
1316   }
1317   for (i=0; i<m; i++) {
1318     a->diag[i] = a->i[i+1];
1319     for (j=a->i[i]; j<a->i[i+1]; j++) {
1320       if (a->j[j] == i) {
1321         a->diag[i] = j;
1322         break;
1323       }
1324     }
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 
1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1335 {
1336   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1339   PetscInt       *tia, *tja;
1340 
1341   PetscFunctionBegin;
1342   *nn = n;
1343   if (!ia) PetscFunctionReturn(0);
1344   if (symmetric) {
1345     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1346     nz   = tia[n];
1347   } else {
1348     tia = a->i; tja = a->j;
1349   }
1350 
1351   if (!blockcompressed && bs > 1) {
1352     (*nn) *= bs;
1353     /* malloc & create the natural set of indices */
1354     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1355     if (n) {
1356       (*ia)[0] = 0;
1357       for (j=1; j<bs; j++) {
1358         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1359       }
1360     }
1361 
1362     for (i=1; i<n; i++) {
1363       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1364       for (j=1; j<bs; j++) {
1365         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1366       }
1367     }
1368     if (n) {
1369       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1370     }
1371 
1372     if (ja) {
1373       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1374       cnt = 0;
1375       for (i=0; i<n; i++) {
1376         for (j=0; j<bs; j++) {
1377           for (k=tia[i]; k<tia[i+1]; k++) {
1378             for (l=0; l<bs; l++) {
1379               (*ja)[cnt++] = bs*tja[k] + l;
1380 	    }
1381           }
1382         }
1383       }
1384     }
1385 
1386     n     *= bs;
1387     nz *= bs*bs;
1388     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1389       ierr = PetscFree(tia);CHKERRQ(ierr);
1390       ierr = PetscFree(tja);CHKERRQ(ierr);
1391     }
1392   } else if (oshift == 1) {
1393     if (symmetric) {
1394       PetscInt nz = tia[A->rmap->n/bs];
1395       /*  add 1 to i and j indices */
1396       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1397       *ia = tia;
1398       if (ja) {
1399 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1400         *ja = tja;
1401       }
1402     } else {
1403       PetscInt nz = a->i[A->rmap->n/bs];
1404       /* malloc space and  add 1 to i and j indices */
1405       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1406       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1407       if (ja) {
1408 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1409 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1410       }
1411     }
1412   } else {
1413     *ia = tia;
1414     if (ja) *ja = tja;
1415   }
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1423 {
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!ia) PetscFunctionReturn(0);
1428   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1429     ierr = PetscFree(*ia);CHKERRQ(ierr);
1430     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443 #if defined(PETSC_USE_LOG)
1444   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1445 #endif
1446   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1447   if (a->row) {
1448     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1449   }
1450   if (a->col) {
1451     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1452   }
1453   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1455   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1456   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1457   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1458   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1459   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1460   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1461   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1462   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1463 
1464   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1465   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1466   ierr = PetscFree(a);CHKERRQ(ierr);
1467 
1468   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1471   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1472   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1473   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1474   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1475   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   switch (op) {
1489   case MAT_ROW_ORIENTED:
1490     a->roworiented    = flg;
1491     break;
1492   case MAT_KEEP_NONZERO_PATTERN:
1493     a->keepnonzeropattern = flg;
1494     break;
1495   case MAT_NEW_NONZERO_LOCATIONS:
1496     a->nonew          = (flg ? 0 : 1);
1497     break;
1498   case MAT_NEW_NONZERO_LOCATION_ERR:
1499     a->nonew          = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1502     a->nonew          = (flg ? -2 : 0);
1503     break;
1504   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1505     a->nounused       = (flg ? -1 : 0);
1506     break;
1507   case MAT_NEW_DIAGONALS:
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509   case MAT_USE_HASH_TABLE:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SYMMETRY_ETERNAL:
1516     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1517     break;
1518   default:
1519     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1531   MatScalar      *aa,*aa_i;
1532   PetscScalar    *v_i;
1533 
1534   PetscFunctionBegin;
1535   bs  = A->rmap->bs;
1536   ai  = a->i;
1537   aj  = a->j;
1538   aa  = a->a;
1539   bs2 = a->bs2;
1540 
1541   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1542 
1543   bn  = row/bs;   /* Block number */
1544   bp  = row % bs; /* Block Position */
1545   M   = ai[bn+1] - ai[bn];
1546   *nz = bs*M;
1547 
1548   if (v) {
1549     *v = 0;
1550     if (*nz) {
1551       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1552       for (i=0; i<M; i++) { /* for each block in the block row */
1553         v_i  = *v + i*bs;
1554         aa_i = aa + bs2*(ai[bn] + i);
1555         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1556       }
1557     }
1558   }
1559 
1560   if (idx) {
1561     *idx = 0;
1562     if (*nz) {
1563       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1564       for (i=0; i<M; i++) { /* for each block in the block row */
1565         idx_i = *idx + i*bs;
1566         itmp  = bs*aj[ai[bn] + i];
1567         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1568       }
1569     }
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1577 {
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1582   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1591 {
1592   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1593   Mat            C;
1594   PetscErrorCode ierr;
1595   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1596   PetscInt       *rows,*cols,bs2=a->bs2;
1597   MatScalar      *array;
1598 
1599   PetscFunctionBegin;
1600   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1601   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1602     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1603     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1604 
1605     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1606     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1607     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1608     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1609     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1610     ierr = PetscFree(col);CHKERRQ(ierr);
1611   } else {
1612     C = *B;
1613   }
1614 
1615   array = a->a;
1616   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1617   for (i=0; i<mbs; i++) {
1618     cols[0] = i*bs;
1619     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1620     len = ai[i+1] - ai[i];
1621     for (j=0; j<len; j++) {
1622       rows[0] = (*aj++)*bs;
1623       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1624       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1625       array += bs2;
1626     }
1627   }
1628   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1629 
1630   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632 
1633   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1634     *B = C;
1635   } else {
1636     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1644 {
1645   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1648   int            fd;
1649   PetscScalar    *aa;
1650   FILE           *file;
1651 
1652   PetscFunctionBegin;
1653   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1654   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1655   col_lens[0] = MAT_FILE_CLASSID;
1656 
1657   col_lens[1] = A->rmap->N;
1658   col_lens[2] = A->cmap->n;
1659   col_lens[3] = a->nz*bs2;
1660 
1661   /* store lengths of each row and write (including header) to file */
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1666     }
1667   }
1668   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1669   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1670 
1671   /* store column indices (zero start index) */
1672   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       for (k=a->i[i]; k<a->i[i+1]; k++) {
1677         for (l=0; l<bs; l++) {
1678           jj[count++] = bs*a->j[k] + l;
1679         }
1680       }
1681     }
1682   }
1683   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1684   ierr = PetscFree(jj);CHKERRQ(ierr);
1685 
1686   /* store nonzero values */
1687   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1688   count = 0;
1689   for (i=0; i<a->mbs; i++) {
1690     for (j=0; j<bs; j++) {
1691       for (k=a->i[i]; k<a->i[i+1]; k++) {
1692         for (l=0; l<bs; l++) {
1693           aa[count++] = a->a[bs2*k + l*bs + j];
1694         }
1695       }
1696     }
1697   }
1698   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1699   ierr = PetscFree(aa);CHKERRQ(ierr);
1700 
1701   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1702   if (file) {
1703     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1711 {
1712   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1713   PetscErrorCode    ierr;
1714   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1715   PetscViewerFormat format;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1719   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1720     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1721   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1722     Mat aij;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1724     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(aij);CHKERRQ(ierr);
1726   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1727      PetscFunctionReturn(0);
1728   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1730     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1731     for (i=0; i<a->mbs; i++) {
1732       for (j=0; j<bs; j++) {
1733         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1734         for (k=a->i[i]; k<a->i[i+1]; k++) {
1735           for (l=0; l<bs; l++) {
1736 #if defined(PETSC_USE_COMPLEX)
1737             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1739                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1740             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1741               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1742                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1743             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1744               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1745             }
1746 #else
1747             if (a->a[bs2*k + l*bs + j] != 0.0) {
1748               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1749             }
1750 #endif
1751           }
1752         }
1753         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1754       }
1755     }
1756     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1757   } else {
1758     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1759     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1760     for (i=0; i<a->mbs; i++) {
1761       for (j=0; j<bs; j++) {
1762         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1763         for (k=a->i[i]; k<a->i[i+1]; k++) {
1764           for (l=0; l<bs; l++) {
1765 #if defined(PETSC_USE_COMPLEX)
1766             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1767               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1768                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1769             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1770               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1771                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1772             } else {
1773               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1774             }
1775 #else
1776             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1777 #endif
1778           }
1779         }
1780         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1781       }
1782     }
1783     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1784   }
1785   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1791 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1792 {
1793   Mat            A = (Mat) Aa;
1794   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1795   PetscErrorCode ierr;
1796   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1797   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1798   MatScalar      *aa;
1799   PetscViewer    viewer;
1800 
1801   PetscFunctionBegin;
1802 
1803   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1804   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1805 
1806   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1807 
1808   /* loop over matrix elements drawing boxes */
1809   color = PETSC_DRAW_BLUE;
1810   for (i=0,row=0; i<mbs; i++,row+=bs) {
1811     for (j=a->i[i]; j<a->i[i+1]; j++) {
1812       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1813       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1814       aa = a->a + j*bs2;
1815       for (k=0; k<bs; k++) {
1816         for (l=0; l<bs; l++) {
1817           if (PetscRealPart(*aa++) >=  0.) continue;
1818           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1819         }
1820       }
1821     }
1822   }
1823   color = PETSC_DRAW_CYAN;
1824   for (i=0,row=0; i<mbs; i++,row+=bs) {
1825     for (j=a->i[i]; j<a->i[i+1]; j++) {
1826       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1827       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1828       aa = a->a + j*bs2;
1829       for (k=0; k<bs; k++) {
1830         for (l=0; l<bs; l++) {
1831           if (PetscRealPart(*aa++) != 0.) continue;
1832           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1833         }
1834       }
1835     }
1836   }
1837 
1838   color = PETSC_DRAW_RED;
1839   for (i=0,row=0; i<mbs; i++,row+=bs) {
1840     for (j=a->i[i]; j<a->i[i+1]; j++) {
1841       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1842       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1843       aa = a->a + j*bs2;
1844       for (k=0; k<bs; k++) {
1845         for (l=0; l<bs; l++) {
1846           if (PetscRealPart(*aa++) <= 0.) continue;
1847           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1848         }
1849       }
1850     }
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1858 {
1859   PetscErrorCode ierr;
1860   PetscReal      xl,yl,xr,yr,w,h;
1861   PetscDraw      draw;
1862   PetscBool      isnull;
1863 
1864   PetscFunctionBegin;
1865 
1866   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1867   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1868 
1869   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1870   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1871   xr += w;    yr += h;  xl = -w;     yl = -h;
1872   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1873   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1874   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatView_SeqBAIJ"
1880 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1881 {
1882   PetscErrorCode ierr;
1883   PetscBool      iascii,isbinary,isdraw;
1884 
1885   PetscFunctionBegin;
1886   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1887   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1888   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1889   if (iascii){
1890     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1891   } else if (isbinary) {
1892     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1893   } else if (isdraw) {
1894     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1895   } else {
1896     Mat B;
1897     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1898     ierr = MatView(B,viewer);CHKERRQ(ierr);
1899     ierr = MatDestroy(B);CHKERRQ(ierr);
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1907 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1908 {
1909   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1910   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1911   PetscInt    *ai = a->i,*ailen = a->ilen;
1912   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1913   MatScalar   *ap,*aa = a->a;
1914 
1915   PetscFunctionBegin;
1916   for (k=0; k<m; k++) { /* loop over rows */
1917     row  = im[k]; brow = row/bs;
1918     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1919     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1920     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1921     nrow = ailen[brow];
1922     for (l=0; l<n; l++) { /* loop over columns */
1923       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1924       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1925       col  = in[l] ;
1926       bcol = col/bs;
1927       cidx = col%bs;
1928       ridx = row%bs;
1929       high = nrow;
1930       low  = 0; /* assume unsorted */
1931       while (high-low > 5) {
1932         t = (low+high)/2;
1933         if (rp[t] > bcol) high = t;
1934         else             low  = t;
1935       }
1936       for (i=low; i<high; i++) {
1937         if (rp[i] > bcol) break;
1938         if (rp[i] == bcol) {
1939           *v++ = ap[bs2*i+bs*cidx+ridx];
1940           goto finished;
1941         }
1942       }
1943       *v++ = 0.0;
1944       finished:;
1945     }
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1952 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1953 {
1954   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1955   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1956   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1957   PetscErrorCode    ierr;
1958   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1959   PetscBool         roworiented=a->roworiented;
1960   const PetscScalar *value = v;
1961   MatScalar         *ap,*aa = a->a,*bap;
1962 
1963   PetscFunctionBegin;
1964   if (roworiented) {
1965     stepval = (n-1)*bs;
1966   } else {
1967     stepval = (m-1)*bs;
1968   }
1969   for (k=0; k<m; k++) { /* loop over added rows */
1970     row  = im[k];
1971     if (row < 0) continue;
1972 #if defined(PETSC_USE_DEBUG)
1973     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1974 #endif
1975     rp   = aj + ai[row];
1976     ap   = aa + bs2*ai[row];
1977     rmax = imax[row];
1978     nrow = ailen[row];
1979     low  = 0;
1980     high = nrow;
1981     for (l=0; l<n; l++) { /* loop over added columns */
1982       if (in[l] < 0) continue;
1983 #if defined(PETSC_USE_DEBUG)
1984       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1985 #endif
1986       col = in[l];
1987       if (roworiented) {
1988         value = v + (k*(stepval+bs) + l)*bs;
1989       } else {
1990         value = v + (l*(stepval+bs) + k)*bs;
1991       }
1992       if (col <= lastcol) low = 0; else high = nrow;
1993       lastcol = col;
1994       while (high-low > 7) {
1995         t = (low+high)/2;
1996         if (rp[t] > col) high = t;
1997         else             low  = t;
1998       }
1999       for (i=low; i<high; i++) {
2000         if (rp[i] > col) break;
2001         if (rp[i] == col) {
2002           bap  = ap +  bs2*i;
2003           if (roworiented) {
2004             if (is == ADD_VALUES) {
2005               for (ii=0; ii<bs; ii++,value+=stepval) {
2006                 for (jj=ii; jj<bs2; jj+=bs) {
2007                   bap[jj] += *value++;
2008                 }
2009               }
2010             } else {
2011               for (ii=0; ii<bs; ii++,value+=stepval) {
2012                 for (jj=ii; jj<bs2; jj+=bs) {
2013                   bap[jj] = *value++;
2014                 }
2015               }
2016             }
2017           } else {
2018             if (is == ADD_VALUES) {
2019               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2020                 for (jj=0; jj<bs; jj++) {
2021                   bap[jj] += value[jj];
2022                 }
2023                 bap += bs;
2024               }
2025             } else {
2026               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2027                 for (jj=0; jj<bs; jj++) {
2028                   bap[jj]  = value[jj];
2029                 }
2030                 bap += bs;
2031               }
2032             }
2033           }
2034           goto noinsert2;
2035         }
2036       }
2037       if (nonew == 1) goto noinsert2;
2038       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2039       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2040       N = nrow++ - 1; high++;
2041       /* shift up all the later entries in this row */
2042       for (ii=N; ii>=i; ii--) {
2043         rp[ii+1] = rp[ii];
2044         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2045       }
2046       if (N >= i) {
2047         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2048       }
2049       rp[i] = col;
2050       bap   = ap +  bs2*i;
2051       if (roworiented) {
2052         for (ii=0; ii<bs; ii++,value+=stepval) {
2053           for (jj=ii; jj<bs2; jj+=bs) {
2054             bap[jj] = *value++;
2055           }
2056         }
2057       } else {
2058         for (ii=0; ii<bs; ii++,value+=stepval) {
2059           for (jj=0; jj<bs; jj++) {
2060             *bap++  = *value++;
2061           }
2062         }
2063       }
2064       noinsert2:;
2065       low = i;
2066     }
2067     ailen[row] = nrow;
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2074 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2075 {
2076   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2077   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2078   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2079   PetscErrorCode ierr;
2080   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2081   MatScalar      *aa = a->a,*ap;
2082   PetscReal      ratio=0.6;
2083 
2084   PetscFunctionBegin;
2085   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2086 
2087   if (m) rmax = ailen[0];
2088   for (i=1; i<mbs; i++) {
2089     /* move each row back by the amount of empty slots (fshift) before it*/
2090     fshift += imax[i-1] - ailen[i-1];
2091     rmax   = PetscMax(rmax,ailen[i]);
2092     if (fshift) {
2093       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2094       N = ailen[i];
2095       for (j=0; j<N; j++) {
2096         ip[j-fshift] = ip[j];
2097         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2098       }
2099     }
2100     ai[i] = ai[i-1] + ailen[i-1];
2101   }
2102   if (mbs) {
2103     fshift += imax[mbs-1] - ailen[mbs-1];
2104     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2105   }
2106   /* reset ilen and imax for each row */
2107   for (i=0; i<mbs; i++) {
2108     ailen[i] = imax[i] = ai[i+1] - ai[i];
2109   }
2110   a->nz = ai[mbs];
2111 
2112   /* diagonals may have moved, so kill the diagonal pointers */
2113   a->idiagvalid = PETSC_FALSE;
2114   if (fshift && a->diag) {
2115     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2116     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2117     a->diag = 0;
2118   }
2119   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2120   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2121   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2122   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2123   A->info.mallocs     += a->reallocs;
2124   a->reallocs          = 0;
2125   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2126 
2127   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2128   if (a->compressedrow.use){
2129     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2130   }
2131 
2132   A->same_nonzero = PETSC_TRUE;
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 /*
2137    This function returns an array of flags which indicate the locations of contiguous
2138    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2139    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2140    Assume: sizes should be long enough to hold all the values.
2141 */
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2144 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2145 {
2146   PetscInt   i,j,k,row;
2147   PetscBool  flg;
2148 
2149   PetscFunctionBegin;
2150   for (i=0,j=0; i<n; j++) {
2151     row = idx[i];
2152     if (row%bs!=0) { /* Not the begining of a block */
2153       sizes[j] = 1;
2154       i++;
2155     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2156       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2157       i++;
2158     } else { /* Begining of the block, so check if the complete block exists */
2159       flg = PETSC_TRUE;
2160       for (k=1; k<bs; k++) {
2161         if (row+k != idx[i+k]) { /* break in the block */
2162           flg = PETSC_FALSE;
2163           break;
2164         }
2165       }
2166       if (flg) { /* No break in the bs */
2167         sizes[j] = bs;
2168         i+= bs;
2169       } else {
2170         sizes[j] = 1;
2171         i++;
2172       }
2173     }
2174   }
2175   *bs_max = j;
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2181 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2182 {
2183   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2184   PetscErrorCode ierr;
2185   PetscInt       i,j,k,count,*rows;
2186   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2187   PetscScalar    zero = 0.0;
2188   MatScalar      *aa;
2189 
2190   PetscFunctionBegin;
2191   /* Make a copy of the IS and  sort it */
2192   /* allocate memory for rows,sizes */
2193   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2194 
2195   /* copy IS values to rows, and sort them */
2196   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2197   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2198   if (baij->keepnonzeropattern) {
2199     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2200     bs_max = is_n;
2201     A->same_nonzero = PETSC_TRUE;
2202   } else {
2203     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2204     A->same_nonzero = PETSC_FALSE;
2205   }
2206 
2207   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2208     row   = rows[j];
2209     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2210     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2211     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2212     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2213       if (diag != 0.0) {
2214         if (baij->ilen[row/bs] > 0) {
2215           baij->ilen[row/bs]       = 1;
2216           baij->j[baij->i[row/bs]] = row/bs;
2217           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2218         }
2219         /* Now insert all the diagonal values for this bs */
2220         for (k=0; k<bs; k++) {
2221           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2222         }
2223       } else { /* (diag == 0.0) */
2224         baij->ilen[row/bs] = 0;
2225       } /* end (diag == 0.0) */
2226     } else { /* (sizes[i] != bs) */
2227 #if defined (PETSC_USE_DEBUG)
2228       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2229 #endif
2230       for (k=0; k<count; k++) {
2231         aa[0] =  zero;
2232         aa    += bs;
2233       }
2234       if (diag != 0.0) {
2235         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2236       }
2237     }
2238   }
2239 
2240   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2241   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2247 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2248 {
2249   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2250   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2251   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2252   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2253   PetscErrorCode ierr;
2254   PetscInt       ridx,cidx,bs2=a->bs2;
2255   PetscBool      roworiented=a->roworiented;
2256   MatScalar      *ap,value,*aa=a->a,*bap;
2257 
2258   PetscFunctionBegin;
2259   if (v) PetscValidScalarPointer(v,6);
2260   for (k=0; k<m; k++) { /* loop over added rows */
2261     row  = im[k];
2262     brow = row/bs;
2263     if (row < 0) continue;
2264 #if defined(PETSC_USE_DEBUG)
2265     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2266 #endif
2267     rp   = aj + ai[brow];
2268     ap   = aa + bs2*ai[brow];
2269     rmax = imax[brow];
2270     nrow = ailen[brow];
2271     low  = 0;
2272     high = nrow;
2273     for (l=0; l<n; l++) { /* loop over added columns */
2274       if (in[l] < 0) continue;
2275 #if defined(PETSC_USE_DEBUG)
2276       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2277 #endif
2278       col = in[l]; bcol = col/bs;
2279       ridx = row % bs; cidx = col % bs;
2280       if (roworiented) {
2281         value = v[l + k*n];
2282       } else {
2283         value = v[k + l*m];
2284       }
2285       if (col <= lastcol) low = 0; else high = nrow;
2286       lastcol = col;
2287       while (high-low > 7) {
2288         t = (low+high)/2;
2289         if (rp[t] > bcol) high = t;
2290         else              low  = t;
2291       }
2292       for (i=low; i<high; i++) {
2293         if (rp[i] > bcol) break;
2294         if (rp[i] == bcol) {
2295           bap  = ap +  bs2*i + bs*cidx + ridx;
2296           if (is == ADD_VALUES) *bap += value;
2297           else                  *bap  = value;
2298           goto noinsert1;
2299         }
2300       }
2301       if (nonew == 1) goto noinsert1;
2302       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2303       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2304       N = nrow++ - 1; high++;
2305       /* shift up all the later entries in this row */
2306       for (ii=N; ii>=i; ii--) {
2307         rp[ii+1] = rp[ii];
2308         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2309       }
2310       if (N>=i) {
2311         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2312       }
2313       rp[i]                      = bcol;
2314       ap[bs2*i + bs*cidx + ridx] = value;
2315       a->nz++;
2316       noinsert1:;
2317       low = i;
2318     }
2319     ailen[brow] = nrow;
2320   }
2321   A->same_nonzero = PETSC_FALSE;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2327 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2328 {
2329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2330   Mat            outA;
2331   PetscErrorCode ierr;
2332   PetscBool      row_identity,col_identity;
2333 
2334   PetscFunctionBegin;
2335   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2336   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2337   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2338   if (!row_identity || !col_identity) {
2339     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2340   }
2341 
2342   outA            = inA;
2343   inA->factortype = MAT_FACTOR_LU;
2344 
2345   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2346 
2347   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2348   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2349   a->row = row;
2350   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2351   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2352   a->col = col;
2353 
2354   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2355   if (a->icol) {
2356     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2357   }
2358   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2359   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2360 
2361   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool )(row_identity && col_identity));CHKERRQ(ierr);
2362   if (!a->solve_work) {
2363     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2364     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2365   }
2366   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2367 
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 EXTERN_C_BEGIN
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2374 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2375 {
2376   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2377   PetscInt    i,nz,mbs;
2378 
2379   PetscFunctionBegin;
2380   nz  = baij->maxnz;
2381   mbs = baij->mbs;
2382   for (i=0; i<nz; i++) {
2383     baij->j[i] = indices[i];
2384   }
2385   baij->nz = nz;
2386   for (i=0; i<mbs; i++) {
2387     baij->ilen[i] = baij->imax[i];
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 EXTERN_C_END
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2395 /*@
2396     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2397        in the matrix.
2398 
2399   Input Parameters:
2400 +  mat - the SeqBAIJ matrix
2401 -  indices - the column indices
2402 
2403   Level: advanced
2404 
2405   Notes:
2406     This can be called if you have precomputed the nonzero structure of the
2407   matrix and want to provide it to the matrix object to improve the performance
2408   of the MatSetValues() operation.
2409 
2410     You MUST have set the correct numbers of nonzeros per row in the call to
2411   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2412 
2413     MUST be called before any calls to MatSetValues();
2414 
2415 @*/
2416 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2417 {
2418   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2419 
2420   PetscFunctionBegin;
2421   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2422   PetscValidPointer(indices,2);
2423   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2424   if (f) {
2425     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2426   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2432 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2433 {
2434   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2435   PetscErrorCode ierr;
2436   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2437   PetscReal      atmp;
2438   PetscScalar    *x,zero = 0.0;
2439   MatScalar      *aa;
2440   PetscInt       ncols,brow,krow,kcol;
2441 
2442   PetscFunctionBegin;
2443   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2444   bs   = A->rmap->bs;
2445   aa   = a->a;
2446   ai   = a->i;
2447   aj   = a->j;
2448   mbs  = a->mbs;
2449 
2450   ierr = VecSet(v,zero);CHKERRQ(ierr);
2451   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2452   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2453   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2454   for (i=0; i<mbs; i++) {
2455     ncols = ai[1] - ai[0]; ai++;
2456     brow  = bs*i;
2457     for (j=0; j<ncols; j++){
2458       for (kcol=0; kcol<bs; kcol++){
2459         for (krow=0; krow<bs; krow++){
2460           atmp = PetscAbsScalar(*aa);aa++;
2461           row = brow + krow;    /* row index */
2462           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2463           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2464         }
2465       }
2466       aj++;
2467     }
2468   }
2469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatCopy_SeqBAIJ"
2475 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2476 {
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   /* If the two matrices have the same copy implementation, use fast copy. */
2481   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2482     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2483     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2484     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2485 
2486     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2487     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2488     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2489   } else {
2490     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2491   }
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2497 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2498 {
2499   PetscErrorCode ierr;
2500 
2501   PetscFunctionBegin;
2502   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2508 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2509 {
2510   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2511   PetscFunctionBegin;
2512   *array = a->a;
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2518 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2519 {
2520   PetscFunctionBegin;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2526 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2527 {
2528   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2529   PetscErrorCode ierr;
2530   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2531   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2532 
2533   PetscFunctionBegin;
2534   if (str == SAME_NONZERO_PATTERN) {
2535     PetscScalar alpha = a;
2536     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2537   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2538     if (y->xtoy && y->XtoY != X) {
2539       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2540       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2541     }
2542     if (!y->xtoy) { /* get xtoy */
2543       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2544       y->XtoY = X;
2545       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2546     }
2547     bs2 = bs*bs;
2548     for (i=0; i<x->nz; i++) {
2549       j = 0;
2550       while (j < bs2){
2551         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2552         j++;
2553       }
2554     }
2555     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2556   } else {
2557     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2564 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2565 {
2566   PetscInt rbs,cbs;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2571   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2572   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2573   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2579 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2580 {
2581   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2582   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2583   MatScalar      *aa = a->a;
2584 
2585   PetscFunctionBegin;
2586   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2592 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2593 {
2594   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2595   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2596   MatScalar      *aa = a->a;
2597 
2598   PetscFunctionBegin;
2599   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2607 /*
2608     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2609 */
2610 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2611 {
2612   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2613   PetscErrorCode ierr;
2614   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2615   PetscInt       nz = a->i[m],row,*jj,mr,col;
2616 
2617   PetscFunctionBegin;
2618   *nn = n;
2619   if (!ia) PetscFunctionReturn(0);
2620   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2621   else {
2622     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2623     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2624     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2625     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2626     jj = a->j;
2627     for (i=0; i<nz; i++) {
2628       collengths[jj[i]]++;
2629     }
2630     cia[0] = oshift;
2631     for (i=0; i<n; i++) {
2632       cia[i+1] = cia[i] + collengths[i];
2633     }
2634     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2635     jj   = a->j;
2636     for (row=0; row<m; row++) {
2637       mr = a->i[row+1] - a->i[row];
2638       for (i=0; i<mr; i++) {
2639         col = *jj++;
2640         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2641       }
2642     }
2643     ierr = PetscFree(collengths);CHKERRQ(ierr);
2644     *ia = cia; *ja = cja;
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2651 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2652 {
2653   PetscErrorCode ierr;
2654 
2655   PetscFunctionBegin;
2656   if (!ia) PetscFunctionReturn(0);
2657   ierr = PetscFree(*ia);CHKERRQ(ierr);
2658   ierr = PetscFree(*ja);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2664 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2665 {
2666   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2667   PetscErrorCode ierr;
2668   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2669   PetscScalar    dx,*y,*xx,*w3_array;
2670   PetscScalar    *vscale_array;
2671   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2672   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2673   void           *fctx = coloring->fctx;
2674   PetscBool      flg = PETSC_FALSE;
2675   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2676   Vec            x1_tmp;
2677 
2678   PetscFunctionBegin;
2679   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2680   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2681   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2682   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2683 
2684   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2685   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2686   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2687   if (flg) {
2688     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2689   } else {
2690     PetscBool  assembled;
2691     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2692     if (assembled) {
2693       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2694     }
2695   }
2696 
2697   x1_tmp = x1;
2698   if (!coloring->vscale){
2699     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2700   }
2701 
2702   /*
2703     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2704     coloring->F for the coarser grids from the finest
2705   */
2706   if (coloring->F) {
2707     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2708     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2709     if (m1 != m2) {
2710       coloring->F = 0;
2711       }
2712     }
2713 
2714   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2715     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2716   }
2717   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2718 
2719   /* Set w1 = F(x1) */
2720   if (coloring->F) {
2721     w1          = coloring->F; /* use already computed value of function */
2722     coloring->F = 0;
2723   } else {
2724     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2725     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2726     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2727   }
2728 
2729   if (!coloring->w3) {
2730     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2731     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2732   }
2733   w3 = coloring->w3;
2734 
2735     CHKMEMQ;
2736     /* Compute all the local scale factors, including ghost points */
2737   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2738   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2739   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2740   if (ctype == IS_COLORING_GHOSTED){
2741     col_start = 0; col_end = N;
2742   } else if (ctype == IS_COLORING_GLOBAL){
2743     xx = xx - start;
2744     vscale_array = vscale_array - start;
2745     col_start = start; col_end = N + start;
2746   }    CHKMEMQ;
2747   for (col=col_start; col<col_end; col++){
2748     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2749     if (coloring->htype[0] == 'w') {
2750       dx = 1.0 + unorm;
2751     } else {
2752       dx  = xx[col];
2753     }
2754     if (dx == 0.0) dx = 1.0;
2755 #if !defined(PETSC_USE_COMPLEX)
2756     if (dx < umin && dx >= 0.0)      dx = umin;
2757     else if (dx < 0.0 && dx > -umin) dx = -umin;
2758 #else
2759     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2760     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2761 #endif
2762     dx               *= epsilon;
2763     vscale_array[col] = 1.0/dx;
2764   }     CHKMEMQ;
2765   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2766   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2767   if (ctype == IS_COLORING_GLOBAL){
2768     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2769     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2770   }
2771   CHKMEMQ;
2772   if (coloring->vscaleforrow) {
2773     vscaleforrow = coloring->vscaleforrow;
2774   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2775 
2776   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2777   /*
2778     Loop over each color
2779   */
2780   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2781   for (k=0; k<coloring->ncolors; k++) {
2782     coloring->currentcolor = k;
2783     for (i=0; i<bs; i++) {
2784       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2785       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2786       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2787       /*
2788 	Loop over each column associated with color
2789 	adding the perturbation to the vector w3.
2790       */
2791       for (l=0; l<coloring->ncolumns[k]; l++) {
2792 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2793 	if (coloring->htype[0] == 'w') {
2794 	  dx = 1.0 + unorm;
2795 	} else {
2796 	  dx  = xx[col];
2797 	}
2798 	if (dx == 0.0) dx = 1.0;
2799 #if !defined(PETSC_USE_COMPLEX)
2800 	if (dx < umin && dx >= 0.0)      dx = umin;
2801 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2802 #else
2803 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2804 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2805 #endif
2806 	dx            *= epsilon;
2807 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2808 	w3_array[col] += dx;
2809       }
2810       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2811       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2812 
2813       /*
2814 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2815 	w2 = F(x1 + dx) - F(x1)
2816       */
2817       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2818       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2819       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2820       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2821 
2822       /*
2823 	Loop over rows of vector, putting results into Jacobian matrix
2824       */
2825       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2826       for (l=0; l<coloring->nrows[k]; l++) {
2827 	row    = bs*coloring->rows[k][l];             /* local row index */
2828 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2829         for (j=0; j<bs; j++) {
2830   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2831           srows[j]  = row + start + j;
2832         }
2833 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2834       }
2835       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2836     }
2837   } /* endof for each color */
2838   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2839   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2840   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2841   ierr = PetscFree(srows);CHKERRQ(ierr);
2842 
2843   coloring->currentcolor = -1;
2844   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2846   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 /* -------------------------------------------------------------------*/
2851 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2852        MatGetRow_SeqBAIJ,
2853        MatRestoreRow_SeqBAIJ,
2854        MatMult_SeqBAIJ_N,
2855 /* 4*/ MatMultAdd_SeqBAIJ_N,
2856        MatMultTranspose_SeqBAIJ,
2857        MatMultTransposeAdd_SeqBAIJ,
2858        0,
2859        0,
2860        0,
2861 /*10*/ 0,
2862        MatLUFactor_SeqBAIJ,
2863        0,
2864        0,
2865        MatTranspose_SeqBAIJ,
2866 /*15*/ MatGetInfo_SeqBAIJ,
2867        MatEqual_SeqBAIJ,
2868        MatGetDiagonal_SeqBAIJ,
2869        MatDiagonalScale_SeqBAIJ,
2870        MatNorm_SeqBAIJ,
2871 /*20*/ 0,
2872        MatAssemblyEnd_SeqBAIJ,
2873        MatSetOption_SeqBAIJ,
2874        MatZeroEntries_SeqBAIJ,
2875 /*24*/ MatZeroRows_SeqBAIJ,
2876        0,
2877        0,
2878        0,
2879        0,
2880 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2881        0,
2882        0,
2883        MatGetArray_SeqBAIJ,
2884        MatRestoreArray_SeqBAIJ,
2885 /*34*/ MatDuplicate_SeqBAIJ,
2886        0,
2887        0,
2888        MatILUFactor_SeqBAIJ,
2889        0,
2890 /*39*/ MatAXPY_SeqBAIJ,
2891        MatGetSubMatrices_SeqBAIJ,
2892        MatIncreaseOverlap_SeqBAIJ,
2893        MatGetValues_SeqBAIJ,
2894        MatCopy_SeqBAIJ,
2895 /*44*/ 0,
2896        MatScale_SeqBAIJ,
2897        0,
2898        0,
2899        0,
2900 /*49*/ MatSetBlockSize_SeqBAIJ,
2901        MatGetRowIJ_SeqBAIJ,
2902        MatRestoreRowIJ_SeqBAIJ,
2903        MatGetColumnIJ_SeqBAIJ,
2904        MatRestoreColumnIJ_SeqBAIJ,
2905 /*54*/ MatFDColoringCreate_SeqAIJ,
2906        0,
2907        0,
2908        0,
2909        MatSetValuesBlocked_SeqBAIJ,
2910 /*59*/ MatGetSubMatrix_SeqBAIJ,
2911        MatDestroy_SeqBAIJ,
2912        MatView_SeqBAIJ,
2913        0,
2914        0,
2915 /*64*/ 0,
2916        0,
2917        0,
2918        0,
2919        0,
2920 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2921        0,
2922        MatConvert_Basic,
2923        0,
2924        0,
2925 /*74*/ 0,
2926        MatFDColoringApply_BAIJ,
2927        0,
2928        0,
2929        0,
2930 /*79*/ 0,
2931        0,
2932        0,
2933        0,
2934        MatLoad_SeqBAIJ,
2935 /*84*/ 0,
2936        0,
2937        0,
2938        0,
2939        0,
2940 /*89*/ 0,
2941        0,
2942        0,
2943        0,
2944        0,
2945 /*94*/ 0,
2946        0,
2947        0,
2948        0,
2949        0,
2950 /*99*/0,
2951        0,
2952        0,
2953        0,
2954        0,
2955 /*104*/0,
2956        MatRealPart_SeqBAIJ,
2957        MatImaginaryPart_SeqBAIJ,
2958        0,
2959        0,
2960 /*109*/0,
2961        0,
2962        0,
2963        0,
2964        MatMissingDiagonal_SeqBAIJ,
2965 /*114*/0,
2966        0,
2967        0,
2968        0,
2969        0,
2970 /*119*/0,
2971        0,
2972        MatMultHermitianTranspose_SeqBAIJ,
2973        MatMultHermitianTransposeAdd_SeqBAIJ,
2974        0,
2975 };
2976 
2977 EXTERN_C_BEGIN
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2980 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2981 {
2982   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2983   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2988 
2989   /* allocate space for values if not already there */
2990   if (!aij->saved_values) {
2991     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2992     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2993   }
2994 
2995   /* copy values over */
2996   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2997   PetscFunctionReturn(0);
2998 }
2999 EXTERN_C_END
3000 
3001 EXTERN_C_BEGIN
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3004 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
3005 {
3006   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3007   PetscErrorCode ierr;
3008   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3009 
3010   PetscFunctionBegin;
3011   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3012   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3013 
3014   /* copy values over */
3015   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3016   PetscFunctionReturn(0);
3017 }
3018 EXTERN_C_END
3019 
3020 EXTERN_C_BEGIN
3021 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3022 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3023 EXTERN_C_END
3024 
3025 EXTERN_C_BEGIN
3026 #undef __FUNCT__
3027 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3028 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3029 {
3030   Mat_SeqBAIJ    *b;
3031   PetscErrorCode ierr;
3032   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3033   PetscBool      flg,skipallocation = PETSC_FALSE;
3034 
3035   PetscFunctionBegin;
3036 
3037   if (nz == MAT_SKIP_ALLOCATION) {
3038     skipallocation = PETSC_TRUE;
3039     nz             = 0;
3040   }
3041 
3042   if (bs < 0) {
3043     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3044       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3045     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3046     bs   = PetscAbs(bs);
3047   }
3048   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3049   bs   = newbs;
3050 
3051   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3052   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3053   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3054   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3055 
3056   B->preallocated = PETSC_TRUE;
3057 
3058   mbs  = B->rmap->n/bs;
3059   nbs  = B->cmap->n/bs;
3060   bs2  = bs*bs;
3061 
3062   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3063 
3064   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3065   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3066   if (nnz) {
3067     for (i=0; i<mbs; i++) {
3068       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3069       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3070     }
3071   }
3072 
3073   b       = (Mat_SeqBAIJ*)B->data;
3074   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3075     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3076   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3077 
3078   if (!flg) {
3079     switch (bs) {
3080     case 1:
3081       B->ops->mult            = MatMult_SeqBAIJ_1;
3082       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3083       B->ops->sor             = MatSOR_SeqBAIJ_1;
3084       break;
3085     case 2:
3086       B->ops->mult            = MatMult_SeqBAIJ_2;
3087       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3088       B->ops->sor             = MatSOR_SeqBAIJ_2;
3089       break;
3090     case 3:
3091       B->ops->mult            = MatMult_SeqBAIJ_3;
3092       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3093       B->ops->sor             = MatSOR_SeqBAIJ_3;
3094       break;
3095     case 4:
3096       B->ops->mult            = MatMult_SeqBAIJ_4;
3097       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3098       B->ops->sor             = MatSOR_SeqBAIJ_4;
3099       break;
3100     case 5:
3101       B->ops->mult            = MatMult_SeqBAIJ_5;
3102       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3103       B->ops->sor             = MatSOR_SeqBAIJ_5;
3104       break;
3105     case 6:
3106       B->ops->mult            = MatMult_SeqBAIJ_6;
3107       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3108       B->ops->sor             = MatSOR_SeqBAIJ_6;
3109       break;
3110     case 7:
3111       B->ops->mult            = MatMult_SeqBAIJ_7;
3112       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3113       B->ops->sor             = MatSOR_SeqBAIJ_7;
3114       break;
3115     case 15:
3116       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3117       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3118       B->ops->sor             = MatSOR_SeqBAIJ_N;
3119       break;
3120     default:
3121       B->ops->mult            = MatMult_SeqBAIJ_N;
3122       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3123       B->ops->sor             = MatSOR_SeqBAIJ_N;
3124       break;
3125     }
3126   }
3127   B->rmap->bs  = bs;
3128   b->mbs       = mbs;
3129   b->nbs       = nbs;
3130   if (!skipallocation) {
3131     if (!b->imax) {
3132       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3133       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3134       b->free_imax_ilen = PETSC_TRUE;
3135     }
3136     /* b->ilen will count nonzeros in each block row so far. */
3137     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3138     if (!nnz) {
3139       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3140       else if (nz < 0) nz = 1;
3141       for (i=0; i<mbs; i++) b->imax[i] = nz;
3142       nz = nz*mbs;
3143     } else {
3144       nz = 0;
3145       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3146     }
3147 
3148     /* allocate the matrix space */
3149     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3150     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3151     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3152     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3153     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3154     b->singlemalloc = PETSC_TRUE;
3155     b->i[0] = 0;
3156     for (i=1; i<mbs+1; i++) {
3157       b->i[i] = b->i[i-1] + b->imax[i-1];
3158     }
3159     b->free_a     = PETSC_TRUE;
3160     b->free_ij    = PETSC_TRUE;
3161   } else {
3162     b->free_a     = PETSC_FALSE;
3163     b->free_ij    = PETSC_FALSE;
3164   }
3165 
3166   B->rmap->bs          = bs;
3167   b->bs2              = bs2;
3168   b->mbs              = mbs;
3169   b->nz               = 0;
3170   b->maxnz            = nz;
3171   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3172   PetscFunctionReturn(0);
3173 }
3174 EXTERN_C_END
3175 
3176 EXTERN_C_BEGIN
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3179 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3180 {
3181   PetscInt       i,m,nz,nz_max=0,*nnz;
3182   PetscScalar    *values=0;
3183   PetscErrorCode ierr;
3184 
3185   PetscFunctionBegin;
3186   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3187   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3188   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3189   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3190   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3191   m = B->rmap->n/bs;
3192 
3193   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3194   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3195   for(i=0; i<m; i++) {
3196     nz = ii[i+1]- ii[i];
3197     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3198     nz_max = PetscMax(nz_max, nz);
3199     nnz[i] = nz;
3200   }
3201   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3202   ierr = PetscFree(nnz);CHKERRQ(ierr);
3203 
3204   values = (PetscScalar*)V;
3205   if (!values) {
3206     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3207     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3208   }
3209   for (i=0; i<m; i++) {
3210     PetscInt          ncols  = ii[i+1] - ii[i];
3211     const PetscInt    *icols = jj + ii[i];
3212     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3213     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3214   }
3215   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3216   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3218 
3219   PetscFunctionReturn(0);
3220 }
3221 EXTERN_C_END
3222 
3223 
3224 EXTERN_C_BEGIN
3225 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3226 #if defined(PETSC_HAVE_MUMPS)
3227 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3228 #endif
3229 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3230 EXTERN_C_END
3231 
3232 /*MC
3233    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3234    block sparse compressed row format.
3235 
3236    Options Database Keys:
3237 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3238 
3239   Level: beginner
3240 
3241 .seealso: MatCreateSeqBAIJ()
3242 M*/
3243 
3244 
3245 EXTERN_C_BEGIN
3246 #undef __FUNCT__
3247 #define __FUNCT__ "MatCreate_SeqBAIJ"
3248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3249 {
3250   PetscErrorCode ierr;
3251   PetscMPIInt    size;
3252   Mat_SeqBAIJ    *b;
3253 
3254   PetscFunctionBegin;
3255   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3256   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3257 
3258   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3259   B->data = (void*)b;
3260   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3261   B->mapping               = 0;
3262   b->row                   = 0;
3263   b->col                   = 0;
3264   b->icol                  = 0;
3265   b->reallocs              = 0;
3266   b->saved_values          = 0;
3267 
3268   b->roworiented           = PETSC_TRUE;
3269   b->nonew                 = 0;
3270   b->diag                  = 0;
3271   b->solve_work            = 0;
3272   b->mult_work             = 0;
3273   B->spptr                 = 0;
3274   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3275   b->keepnonzeropattern    = PETSC_FALSE;
3276   b->xtoy                  = 0;
3277   b->XtoY                  = 0;
3278   b->compressedrow.use     = PETSC_FALSE;
3279   b->compressedrow.nrows   = 0;
3280   b->compressedrow.i       = PETSC_NULL;
3281   b->compressedrow.rindex  = PETSC_NULL;
3282   b->compressedrow.checked = PETSC_FALSE;
3283   B->same_nonzero          = PETSC_FALSE;
3284 
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3286                                      "MatGetFactorAvailable_seqbaij_petsc",
3287                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3289                                      "MatGetFactor_seqbaij_petsc",
3290                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3291 #if defined(PETSC_HAVE_MUMPS)
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3293 #endif
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3295                                      "MatInvertBlockDiagonal_SeqBAIJ",
3296                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3298                                      "MatStoreValues_SeqBAIJ",
3299                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3301                                      "MatRetrieveValues_SeqBAIJ",
3302                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3304                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3305                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3307                                      "MatConvert_SeqBAIJ_SeqAIJ",
3308                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3309   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3310                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3311                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3313                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3314                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3315   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3316                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3317                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3318   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3319   PetscFunctionReturn(0);
3320 }
3321 EXTERN_C_END
3322 
3323 #undef __FUNCT__
3324 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3325 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3326 {
3327   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3328   PetscErrorCode ierr;
3329   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3330 
3331   PetscFunctionBegin;
3332   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3333 
3334   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3335     c->imax = a->imax;
3336     c->ilen = a->ilen;
3337     c->free_imax_ilen = PETSC_FALSE;
3338   } else {
3339     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3340     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3341     for (i=0; i<mbs; i++) {
3342       c->imax[i] = a->imax[i];
3343       c->ilen[i] = a->ilen[i];
3344     }
3345     c->free_imax_ilen = PETSC_TRUE;
3346   }
3347 
3348   /* allocate the matrix space */
3349   if (mallocmatspace){
3350     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3351       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3352       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3353       c->singlemalloc = PETSC_FALSE;
3354       c->free_ij      = PETSC_FALSE;
3355       c->i            = a->i;
3356       c->j            = a->j;
3357       c->parent       = A;
3358       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3359       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3360       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3361     } else {
3362       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3363       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3364       c->singlemalloc = PETSC_TRUE;
3365       c->free_ij      = PETSC_TRUE;
3366       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3367       if (mbs > 0) {
3368 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3369 	if (cpvalues == MAT_COPY_VALUES) {
3370 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3371 	} else {
3372 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3373 	}
3374       }
3375     }
3376   }
3377 
3378   c->roworiented = a->roworiented;
3379   c->nonew       = a->nonew;
3380   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3381   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3382   c->bs2         = a->bs2;
3383   c->mbs         = a->mbs;
3384   c->nbs         = a->nbs;
3385 
3386   if (a->diag) {
3387     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3388       c->diag      = a->diag;
3389       c->free_diag = PETSC_FALSE;
3390     } else {
3391       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3392       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3393       for (i=0; i<mbs; i++) {
3394         c->diag[i] = a->diag[i];
3395       }
3396       c->free_diag = PETSC_TRUE;
3397     }
3398   } else c->diag        = 0;
3399   c->nz                 = a->nz;
3400   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3401   c->solve_work         = 0;
3402   c->mult_work          = 0;
3403   c->free_a             = PETSC_TRUE;
3404   c->free_ij            = PETSC_TRUE;
3405   C->preallocated       = PETSC_TRUE;
3406   C->assembled          = PETSC_TRUE;
3407 
3408   c->compressedrow.use     = a->compressedrow.use;
3409   c->compressedrow.nrows   = a->compressedrow.nrows;
3410   c->compressedrow.checked = a->compressedrow.checked;
3411   if (a->compressedrow.checked && a->compressedrow.use){
3412     i = a->compressedrow.nrows;
3413     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3414     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3415     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3416     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3417   } else {
3418     c->compressedrow.use    = PETSC_FALSE;
3419     c->compressedrow.i      = PETSC_NULL;
3420     c->compressedrow.rindex = PETSC_NULL;
3421   }
3422   C->same_nonzero = A->same_nonzero;
3423   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3424   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 #undef __FUNCT__
3429 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3430 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3431 {
3432     PetscErrorCode ierr;
3433 
3434   PetscFunctionBegin;
3435   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3436   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3437   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3438   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 #undef __FUNCT__
3443 #define __FUNCT__ "MatLoad_SeqBAIJ"
3444 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3445 {
3446   Mat_SeqBAIJ    *a;
3447   PetscErrorCode ierr;
3448   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3449   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3450   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3451   PetscInt       *masked,nmask,tmp,bs2,ishift;
3452   PetscMPIInt    size;
3453   int            fd;
3454   PetscScalar    *aa;
3455   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3456 
3457   PetscFunctionBegin;
3458   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3459   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3460   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3461   bs2  = bs*bs;
3462 
3463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3464   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3465   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3466   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3467   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3468   M = header[1]; N = header[2]; nz = header[3];
3469 
3470   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3471   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3472 
3473   /*
3474      This code adds extra rows to make sure the number of rows is
3475     divisible by the blocksize
3476   */
3477   mbs        = M/bs;
3478   extra_rows = bs - M + bs*(mbs);
3479   if (extra_rows == bs) extra_rows = 0;
3480   else                  mbs++;
3481   if (extra_rows) {
3482     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3483   }
3484 
3485   /* Set global sizes if not already set */
3486   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3487     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3488   } else { /* Check if the matrix global sizes are correct */
3489     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3490     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3491   }
3492 
3493   /* read in row lengths */
3494   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3495   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3496   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3497 
3498   /* read in column indices */
3499   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3500   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3501   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3502 
3503   /* loop over row lengths determining block row lengths */
3504   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3505   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3506   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3507   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3508   rowcount = 0;
3509   nzcount = 0;
3510   for (i=0; i<mbs; i++) {
3511     nmask = 0;
3512     for (j=0; j<bs; j++) {
3513       kmax = rowlengths[rowcount];
3514       for (k=0; k<kmax; k++) {
3515         tmp = jj[nzcount++]/bs;
3516         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3517       }
3518       rowcount++;
3519     }
3520     browlengths[i] += nmask;
3521     /* zero out the mask elements we set */
3522     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3523   }
3524 
3525   /* Do preallocation  */
3526   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3527   a = (Mat_SeqBAIJ*)newmat->data;
3528 
3529   /* set matrix "i" values */
3530   a->i[0] = 0;
3531   for (i=1; i<= mbs; i++) {
3532     a->i[i]      = a->i[i-1] + browlengths[i-1];
3533     a->ilen[i-1] = browlengths[i-1];
3534   }
3535   a->nz         = 0;
3536   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3537 
3538   /* read in nonzero values */
3539   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3540   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3541   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3542 
3543   /* set "a" and "j" values into matrix */
3544   nzcount = 0; jcount = 0;
3545   for (i=0; i<mbs; i++) {
3546     nzcountb = nzcount;
3547     nmask    = 0;
3548     for (j=0; j<bs; j++) {
3549       kmax = rowlengths[i*bs+j];
3550       for (k=0; k<kmax; k++) {
3551         tmp = jj[nzcount++]/bs;
3552 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3553       }
3554     }
3555     /* sort the masked values */
3556     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3557 
3558     /* set "j" values into matrix */
3559     maskcount = 1;
3560     for (j=0; j<nmask; j++) {
3561       a->j[jcount++]  = masked[j];
3562       mask[masked[j]] = maskcount++;
3563     }
3564     /* set "a" values into matrix */
3565     ishift = bs2*a->i[i];
3566     for (j=0; j<bs; j++) {
3567       kmax = rowlengths[i*bs+j];
3568       for (k=0; k<kmax; k++) {
3569         tmp       = jj[nzcountb]/bs ;
3570         block     = mask[tmp] - 1;
3571         point     = jj[nzcountb] - bs*tmp;
3572         idx       = ishift + bs2*block + j + bs*point;
3573         a->a[idx] = (MatScalar)aa[nzcountb++];
3574       }
3575     }
3576     /* zero out the mask elements we set */
3577     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3578   }
3579   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3580 
3581   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3582   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3583   ierr = PetscFree(aa);CHKERRQ(ierr);
3584   ierr = PetscFree(jj);CHKERRQ(ierr);
3585   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3586 
3587   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3588   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3589   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3590   PetscFunctionReturn(0);
3591 }
3592 
3593 #undef __FUNCT__
3594 #define __FUNCT__ "MatCreateSeqBAIJ"
3595 /*@C
3596    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3597    compressed row) format.  For good matrix assembly performance the
3598    user should preallocate the matrix storage by setting the parameter nz
3599    (or the array nnz).  By setting these parameters accurately, performance
3600    during matrix assembly can be increased by more than a factor of 50.
3601 
3602    Collective on MPI_Comm
3603 
3604    Input Parameters:
3605 +  comm - MPI communicator, set to PETSC_COMM_SELF
3606 .  bs - size of block
3607 .  m - number of rows
3608 .  n - number of columns
3609 .  nz - number of nonzero blocks  per block row (same for all rows)
3610 -  nnz - array containing the number of nonzero blocks in the various block rows
3611          (possibly different for each block row) or PETSC_NULL
3612 
3613    Output Parameter:
3614 .  A - the matrix
3615 
3616    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3617    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3618    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3619 
3620    Options Database Keys:
3621 .   -mat_no_unroll - uses code that does not unroll the loops in the
3622                      block calculations (much slower)
3623 .    -mat_block_size - size of the blocks to use
3624 
3625    Level: intermediate
3626 
3627    Notes:
3628    The number of rows and columns must be divisible by blocksize.
3629 
3630    If the nnz parameter is given then the nz parameter is ignored
3631 
3632    A nonzero block is any block that as 1 or more nonzeros in it
3633 
3634    The block AIJ format is fully compatible with standard Fortran 77
3635    storage.  That is, the stored row and column indices can begin at
3636    either one (as in Fortran) or zero.  See the users' manual for details.
3637 
3638    Specify the preallocated storage with either nz or nnz (not both).
3639    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3640    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3641    matrices.
3642 
3643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3644 @*/
3645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3646 {
3647   PetscErrorCode ierr;
3648 
3649   PetscFunctionBegin;
3650   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3651   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3652   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3653   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 #undef __FUNCT__
3658 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3659 /*@C
3660    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3661    per row in the matrix. For good matrix assembly performance the
3662    user should preallocate the matrix storage by setting the parameter nz
3663    (or the array nnz).  By setting these parameters accurately, performance
3664    during matrix assembly can be increased by more than a factor of 50.
3665 
3666    Collective on MPI_Comm
3667 
3668    Input Parameters:
3669 +  A - the matrix
3670 .  bs - size of block
3671 .  nz - number of block nonzeros per block row (same for all rows)
3672 -  nnz - array containing the number of block nonzeros in the various block rows
3673          (possibly different for each block row) or PETSC_NULL
3674 
3675    Options Database Keys:
3676 .   -mat_no_unroll - uses code that does not unroll the loops in the
3677                      block calculations (much slower)
3678 .    -mat_block_size - size of the blocks to use
3679 
3680    Level: intermediate
3681 
3682    Notes:
3683    If the nnz parameter is given then the nz parameter is ignored
3684 
3685    You can call MatGetInfo() to get information on how effective the preallocation was;
3686    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3687    You can also run with the option -info and look for messages with the string
3688    malloc in them to see if additional memory allocation was needed.
3689 
3690    The block AIJ format is fully compatible with standard Fortran 77
3691    storage.  That is, the stored row and column indices can begin at
3692    either one (as in Fortran) or zero.  See the users' manual for details.
3693 
3694    Specify the preallocated storage with either nz or nnz (not both).
3695    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3696    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3697 
3698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3699 @*/
3700 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3701 {
3702   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3703 
3704   PetscFunctionBegin;
3705   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3706   if (f) {
3707     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3708   }
3709   PetscFunctionReturn(0);
3710 }
3711 
3712 #undef __FUNCT__
3713 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3714 /*@C
3715    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3716    (the default sequential PETSc format).
3717 
3718    Collective on MPI_Comm
3719 
3720    Input Parameters:
3721 +  A - the matrix
3722 .  i - the indices into j for the start of each local row (starts with zero)
3723 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3724 -  v - optional values in the matrix
3725 
3726    Level: developer
3727 
3728 .keywords: matrix, aij, compressed row, sparse
3729 
3730 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3731 @*/
3732 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3733 {
3734   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3735 
3736   PetscFunctionBegin;
3737   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3738   if (f) {
3739     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3740   }
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 
3745 #undef __FUNCT__
3746 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3747 /*@
3748      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3749 
3750      Collective on MPI_Comm
3751 
3752    Input Parameters:
3753 +  comm - must be an MPI communicator of size 1
3754 .  bs - size of block
3755 .  m - number of rows
3756 .  n - number of columns
3757 .  i - row indices
3758 .  j - column indices
3759 -  a - matrix values
3760 
3761    Output Parameter:
3762 .  mat - the matrix
3763 
3764    Level: advanced
3765 
3766    Notes:
3767        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3768     once the matrix is destroyed
3769 
3770        You cannot set new nonzero locations into this matrix, that will generate an error.
3771 
3772        The i and j indices are 0 based
3773 
3774        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3775 
3776 
3777 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3778 
3779 @*/
3780 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3781 {
3782   PetscErrorCode ierr;
3783   PetscInt       ii;
3784   Mat_SeqBAIJ    *baij;
3785 
3786   PetscFunctionBegin;
3787   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3788   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3789 
3790   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3791   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3792   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3793   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3794   baij = (Mat_SeqBAIJ*)(*mat)->data;
3795   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3796   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3797 
3798   baij->i = i;
3799   baij->j = j;
3800   baij->a = a;
3801   baij->singlemalloc = PETSC_FALSE;
3802   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3803   baij->free_a       = PETSC_FALSE;
3804   baij->free_ij       = PETSC_FALSE;
3805 
3806   for (ii=0; ii<m; ii++) {
3807     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3808 #if defined(PETSC_USE_DEBUG)
3809     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3810 #endif
3811   }
3812 #if defined(PETSC_USE_DEBUG)
3813   for (ii=0; ii<baij->i[m]; ii++) {
3814     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3815     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3816   }
3817 #endif
3818 
3819   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3820   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3821   PetscFunctionReturn(0);
3822 }
3823