xref: /petsc/src/mat/impls/baij/seq/baij.c (revision db609ea75e24d1bf22e421c6099bd64ada4af056)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       *tia, *tja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (ja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353 	    }
1354           }
1355         }
1356       }
1357     }
1358 
1359     n     *= bs;
1360     nz *= bs*bs;
1361     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362       ierr = PetscFree(tia);CHKERRQ(ierr);
1363       ierr = PetscFree(tja);CHKERRQ(ierr);
1364     }
1365   } else if (oshift == 1) {
1366     if (symmetric) {
1367       PetscInt nz = tia[A->rmap->n/bs];
1368       /*  add 1 to i and j indices */
1369       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370       *ia = tia;
1371       if (ja) {
1372 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373         *ja = tja;
1374       }
1375     } else {
1376       PetscInt nz = a->i[A->rmap->n/bs];
1377       /* malloc space and  add 1 to i and j indices */
1378       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1379       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380       if (ja) {
1381 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1382 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383       }
1384     }
1385   } else {
1386     *ia = tia;
1387     if (ja) *ja = tja;
1388   }
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!ia) PetscFunctionReturn(0);
1401   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402     ierr = PetscFree(*ia);CHKERRQ(ierr);
1403     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_USE_LOG)
1417   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418 #endif
1419   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1422   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1423   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1424   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1425   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1427   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1428   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1429   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1430   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1431   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1432 
1433   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1434   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1435   ierr = PetscFree(A->data);CHKERRQ(ierr);
1436 
1437   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454 {
1455   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   switch (op) {
1460   case MAT_ROW_ORIENTED:
1461     a->roworiented    = flg;
1462     break;
1463   case MAT_KEEP_NONZERO_PATTERN:
1464     a->keepnonzeropattern = flg;
1465     break;
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467     a->nonew          = (flg ? 0 : 1);
1468     break;
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     a->nonew          = (flg ? -1 : 0);
1471     break;
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473     a->nonew          = (flg ? -2 : 0);
1474     break;
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476     a->nounused       = (flg ? -1 : 0);
1477     break;
1478   case MAT_CHECK_COMPRESSED_ROW:
1479     a->compressedrow.check = flg;
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483   case MAT_USE_HASH_TABLE:
1484     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1485     break;
1486   case MAT_SYMMETRIC:
1487   case MAT_STRUCTURALLY_SYMMETRIC:
1488   case MAT_HERMITIAN:
1489   case MAT_SYMMETRY_ETERNAL:
1490     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1491     break;
1492   default:
1493     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1500 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501 {
1502   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode ierr;
1504   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505   MatScalar      *aa,*aa_i;
1506   PetscScalar    *v_i;
1507 
1508   PetscFunctionBegin;
1509   bs  = A->rmap->bs;
1510   ai  = a->i;
1511   aj  = a->j;
1512   aa  = a->a;
1513   bs2 = a->bs2;
1514 
1515   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516 
1517   bn  = row/bs;   /* Block number */
1518   bp  = row % bs; /* Block Position */
1519   M   = ai[bn+1] - ai[bn];
1520   *nz = bs*M;
1521 
1522   if (v) {
1523     *v = 0;
1524     if (*nz) {
1525       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1526       for (i=0; i<M; i++) { /* for each block in the block row */
1527         v_i  = *v + i*bs;
1528         aa_i = aa + bs2*(ai[bn] + i);
1529         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530       }
1531     }
1532   }
1533 
1534   if (idx) {
1535     *idx = 0;
1536     if (*nz) {
1537       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1538       for (i=0; i<M; i++) { /* for each block in the block row */
1539         idx_i = *idx + i*bs;
1540         itmp  = bs*aj[ai[bn] + i];
1541         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542       }
1543     }
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1550 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1556   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1564 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1567   Mat            C;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570   PetscInt       *rows,*cols,bs2=a->bs2;
1571   MatScalar      *array;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1577     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1578 
1579     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1581     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1582     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1583     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1584     ierr = PetscFree(col);CHKERRQ(ierr);
1585   } else {
1586     C = *B;
1587   }
1588 
1589   array = a->a;
1590   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1591   for (i=0; i<mbs; i++) {
1592     cols[0] = i*bs;
1593     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594     len = ai[i+1] - ai[i];
1595     for (j=0; j<len; j++) {
1596       rows[0] = (*aj++)*bs;
1597       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1599       array += bs2;
1600     }
1601   }
1602   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1603 
1604   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606 
1607   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608     *B = C;
1609   } else {
1610     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1618 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1619 {
1620   PetscErrorCode ierr;
1621   Mat            Btrans;
1622 
1623   PetscFunctionBegin;
1624   *f = PETSC_FALSE;
1625   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1626   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1627   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1634 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635 {
1636   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1637   PetscErrorCode ierr;
1638   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639   int            fd;
1640   PetscScalar    *aa;
1641   FILE           *file;
1642 
1643   PetscFunctionBegin;
1644   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1645   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1646   col_lens[0] = MAT_FILE_CLASSID;
1647 
1648   col_lens[1] = A->rmap->N;
1649   col_lens[2] = A->cmap->n;
1650   col_lens[3] = a->nz*bs2;
1651 
1652   /* store lengths of each row and write (including header) to file */
1653   count = 0;
1654   for (i=0; i<a->mbs; i++) {
1655     for (j=0; j<bs; j++) {
1656       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657     }
1658   }
1659   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1660   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1661 
1662   /* store column indices (zero start index) */
1663   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1664   count = 0;
1665   for (i=0; i<a->mbs; i++) {
1666     for (j=0; j<bs; j++) {
1667       for (k=a->i[i]; k<a->i[i+1]; k++) {
1668         for (l=0; l<bs; l++) {
1669           jj[count++] = bs*a->j[k] + l;
1670         }
1671       }
1672     }
1673   }
1674   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1675   ierr = PetscFree(jj);CHKERRQ(ierr);
1676 
1677   /* store nonzero values */
1678   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1679   count = 0;
1680   for (i=0; i<a->mbs; i++) {
1681     for (j=0; j<bs; j++) {
1682       for (k=a->i[i]; k<a->i[i+1]; k++) {
1683         for (l=0; l<bs; l++) {
1684           aa[count++] = a->a[bs2*k + l*bs + j];
1685         }
1686       }
1687     }
1688   }
1689   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1690   ierr = PetscFree(aa);CHKERRQ(ierr);
1691 
1692   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1693   if (file) {
1694     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1701 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702 {
1703   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1704   PetscErrorCode    ierr;
1705   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706   PetscViewerFormat format;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1710   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1712   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713     Mat aij;
1714     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1715     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1716     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1717   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718      PetscFunctionReturn(0);
1719   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1721     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1722     for (i=0; i<a->mbs; i++) {
1723       for (j=0; j<bs; j++) {
1724         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1725         for (k=a->i[i]; k<a->i[i+1]; k++) {
1726           for (l=0; l<bs; l++) {
1727 #if defined(PETSC_USE_COMPLEX)
1728             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1731             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             }
1737 #else
1738             if (a->a[bs2*k + l*bs + j] != 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740             }
1741 #endif
1742           }
1743         }
1744         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1745       }
1746     }
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1748   } else {
1749     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1750     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1751     for (i=0; i<a->mbs; i++) {
1752       for (j=0; j<bs; j++) {
1753         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1754         for (k=a->i[i]; k<a->i[i+1]; k++) {
1755           for (l=0; l<bs; l++) {
1756 #if defined(PETSC_USE_COMPLEX)
1757             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1760             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             }
1766 #else
1767             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1768 #endif
1769           }
1770         }
1771         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1772       }
1773     }
1774     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1775   }
1776   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1782 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783 {
1784   Mat            A = (Mat) Aa;
1785   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1786   PetscErrorCode ierr;
1787   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789   MatScalar      *aa;
1790   PetscViewer    viewer;
1791   PetscViewerFormat format;
1792 
1793   PetscFunctionBegin;
1794   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1795   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1796 
1797   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1798 
1799   /* loop over matrix elements drawing boxes */
1800 
1801   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802     color = PETSC_DRAW_BLUE;
1803     for (i=0,row=0; i<mbs; i++,row+=bs) {
1804       for (j=a->i[i]; j<a->i[i+1]; j++) {
1805         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807         aa = a->a + j*bs2;
1808         for (k=0; k<bs; k++) {
1809           for (l=0; l<bs; l++) {
1810             if (PetscRealPart(*aa++) >=  0.) continue;
1811             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1812           }
1813         }
1814       }
1815     }
1816     color = PETSC_DRAW_CYAN;
1817     for (i=0,row=0; i<mbs; i++,row+=bs) {
1818       for (j=a->i[i]; j<a->i[i+1]; j++) {
1819         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821         aa = a->a + j*bs2;
1822         for (k=0; k<bs; k++) {
1823           for (l=0; l<bs; l++) {
1824             if (PetscRealPart(*aa++) != 0.) continue;
1825             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1826           }
1827         }
1828       }
1829     }
1830     color = PETSC_DRAW_RED;
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             if (PetscRealPart(*aa++) <= 0.) continue;
1839             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   } else {
1845     /* use contour shading to indicate magnitude of values */
1846     /* first determine max of all nonzero values */
1847     PetscDraw   popup;
1848     PetscReal scale,maxv = 0.0;
1849 
1850     for (i=0; i<a->nz*a->bs2; i++) {
1851       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852     }
1853     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1855     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1856     for (i=0,row=0; i<mbs; i++,row+=bs) {
1857       for (j=a->i[i]; j<a->i[i+1]; j++) {
1858         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860         aa = a->a + j*bs2;
1861         for (k=0; k<bs; k++) {
1862           for (l=0; l<bs; l++) {
1863             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1865           }
1866         }
1867       }
1868     }
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1875 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876 {
1877   PetscErrorCode ierr;
1878   PetscReal      xl,yl,xr,yr,w,h;
1879   PetscDraw      draw;
1880   PetscBool      isnull;
1881 
1882   PetscFunctionBegin;
1883 
1884   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1885   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1886 
1887   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1888   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889   xr += w;    yr += h;  xl = -w;     yl = -h;
1890   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1891   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1892   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatView_SeqBAIJ"
1898 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899 {
1900   PetscErrorCode ierr;
1901   PetscBool      iascii,isbinary,isdraw;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1905   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1906   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1907   if (iascii){
1908     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1909   } else if (isbinary) {
1910     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1911   } else if (isdraw) {
1912     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1913   } else {
1914     Mat B;
1915     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1916     ierr = MatView(B,viewer);CHKERRQ(ierr);
1917     ierr = MatDestroy(&B);CHKERRQ(ierr);
1918   }
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1925 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926 {
1927   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929   PetscInt    *ai = a->i,*ailen = a->ilen;
1930   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931   MatScalar   *ap,*aa = a->a;
1932 
1933   PetscFunctionBegin;
1934   for (k=0; k<m; k++) { /* loop over rows */
1935     row  = im[k]; brow = row/bs;
1936     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939     nrow = ailen[brow];
1940     for (l=0; l<n; l++) { /* loop over columns */
1941       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943       col  = in[l] ;
1944       bcol = col/bs;
1945       cidx = col%bs;
1946       ridx = row%bs;
1947       high = nrow;
1948       low  = 0; /* assume unsorted */
1949       while (high-low > 5) {
1950         t = (low+high)/2;
1951         if (rp[t] > bcol) high = t;
1952         else             low  = t;
1953       }
1954       for (i=low; i<high; i++) {
1955         if (rp[i] > bcol) break;
1956         if (rp[i] == bcol) {
1957           *v++ = ap[bs2*i+bs*cidx+ridx];
1958           goto finished;
1959         }
1960       }
1961       *v++ = 0.0;
1962       finished:;
1963     }
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1970 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971 {
1972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975   PetscErrorCode    ierr;
1976   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977   PetscBool         roworiented=a->roworiented;
1978   const PetscScalar *value = v;
1979   MatScalar         *ap,*aa = a->a,*bap;
1980 
1981   PetscFunctionBegin;
1982   if (roworiented) {
1983     stepval = (n-1)*bs;
1984   } else {
1985     stepval = (m-1)*bs;
1986   }
1987   for (k=0; k<m; k++) { /* loop over added rows */
1988     row  = im[k];
1989     if (row < 0) continue;
1990 #if defined(PETSC_USE_DEBUG)
1991     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992 #endif
1993     rp   = aj + ai[row];
1994     ap   = aa + bs2*ai[row];
1995     rmax = imax[row];
1996     nrow = ailen[row];
1997     low  = 0;
1998     high = nrow;
1999     for (l=0; l<n; l++) { /* loop over added columns */
2000       if (in[l] < 0) continue;
2001 #if defined(PETSC_USE_DEBUG)
2002       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003 #endif
2004       col = in[l];
2005       if (roworiented) {
2006         value = v + (k*(stepval+bs) + l)*bs;
2007       } else {
2008         value = v + (l*(stepval+bs) + k)*bs;
2009       }
2010       if (col <= lastcol) low = 0; else high = nrow;
2011       lastcol = col;
2012       while (high-low > 7) {
2013         t = (low+high)/2;
2014         if (rp[t] > col) high = t;
2015         else             low  = t;
2016       }
2017       for (i=low; i<high; i++) {
2018         if (rp[i] > col) break;
2019         if (rp[i] == col) {
2020           bap  = ap +  bs2*i;
2021           if (roworiented) {
2022             if (is == ADD_VALUES) {
2023               for (ii=0; ii<bs; ii++,value+=stepval) {
2024                 for (jj=ii; jj<bs2; jj+=bs) {
2025                   bap[jj] += *value++;
2026                 }
2027               }
2028             } else {
2029               for (ii=0; ii<bs; ii++,value+=stepval) {
2030                 for (jj=ii; jj<bs2; jj+=bs) {
2031                   bap[jj] = *value++;
2032                 }
2033               }
2034             }
2035           } else {
2036             if (is == ADD_VALUES) {
2037               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038                 for (jj=0; jj<bs; jj++) {
2039                   bap[jj] += value[jj];
2040                 }
2041                 bap += bs;
2042               }
2043             } else {
2044               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045                 for (jj=0; jj<bs; jj++) {
2046                   bap[jj]  = value[jj];
2047                 }
2048                 bap += bs;
2049               }
2050             }
2051           }
2052           goto noinsert2;
2053         }
2054       }
2055       if (nonew == 1) goto noinsert2;
2056       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058       N = nrow++ - 1; high++;
2059       /* shift up all the later entries in this row */
2060       for (ii=N; ii>=i; ii--) {
2061         rp[ii+1] = rp[ii];
2062         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       if (N >= i) {
2065         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2066       }
2067       rp[i] = col;
2068       bap   = ap +  bs2*i;
2069       if (roworiented) {
2070         for (ii=0; ii<bs; ii++,value+=stepval) {
2071           for (jj=ii; jj<bs2; jj+=bs) {
2072             bap[jj] = *value++;
2073           }
2074         }
2075       } else {
2076         for (ii=0; ii<bs; ii++,value+=stepval) {
2077           for (jj=0; jj<bs; jj++) {
2078             *bap++  = *value++;
2079           }
2080         }
2081       }
2082       noinsert2:;
2083       low = i;
2084     }
2085     ailen[row] = nrow;
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2092 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093 {
2094   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2095   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2097   PetscErrorCode ierr;
2098   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099   MatScalar      *aa = a->a,*ap;
2100   PetscReal      ratio=0.6;
2101 
2102   PetscFunctionBegin;
2103   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2104 
2105   if (m) rmax = ailen[0];
2106   for (i=1; i<mbs; i++) {
2107     /* move each row back by the amount of empty slots (fshift) before it*/
2108     fshift += imax[i-1] - ailen[i-1];
2109     rmax   = PetscMax(rmax,ailen[i]);
2110     if (fshift) {
2111       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112       N = ailen[i];
2113       for (j=0; j<N; j++) {
2114         ip[j-fshift] = ip[j];
2115         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2116       }
2117     }
2118     ai[i] = ai[i-1] + ailen[i-1];
2119   }
2120   if (mbs) {
2121     fshift += imax[mbs-1] - ailen[mbs-1];
2122     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123   }
2124   /* reset ilen and imax for each row */
2125   for (i=0; i<mbs; i++) {
2126     ailen[i] = imax[i] = ai[i+1] - ai[i];
2127   }
2128   a->nz = ai[mbs];
2129 
2130   /* diagonals may have moved, so kill the diagonal pointers */
2131   a->idiagvalid = PETSC_FALSE;
2132   if (fshift && a->diag) {
2133     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2134     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2135     a->diag = 0;
2136   }
2137   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2139   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2140   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2141   A->info.mallocs     += a->reallocs;
2142   a->reallocs          = 0;
2143   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2144 
2145   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2146   A->same_nonzero = PETSC_TRUE;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    This function returns an array of flags which indicate the locations of contiguous
2152    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2153    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154    Assume: sizes should be long enough to hold all the values.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2158 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159 {
2160   PetscInt   i,j,k,row;
2161   PetscBool  flg;
2162 
2163   PetscFunctionBegin;
2164   for (i=0,j=0; i<n; j++) {
2165     row = idx[i];
2166     if (row%bs!=0) { /* Not the begining of a block */
2167       sizes[j] = 1;
2168       i++;
2169     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2171       i++;
2172     } else { /* Begining of the block, so check if the complete block exists */
2173       flg = PETSC_TRUE;
2174       for (k=1; k<bs; k++) {
2175         if (row+k != idx[i+k]) { /* break in the block */
2176           flg = PETSC_FALSE;
2177           break;
2178         }
2179       }
2180       if (flg) { /* No break in the bs */
2181         sizes[j] = bs;
2182         i+= bs;
2183       } else {
2184         sizes[j] = 1;
2185         i++;
2186       }
2187     }
2188   }
2189   *bs_max = j;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2195 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196 {
2197   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2198   PetscErrorCode    ierr;
2199   PetscInt          i,j,k,count,*rows;
2200   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201   PetscScalar       zero = 0.0;
2202   MatScalar         *aa;
2203   const PetscScalar *xx;
2204   PetscScalar       *bb;
2205 
2206   PetscFunctionBegin;
2207   /* fix right hand side if needed */
2208   if (x && b) {
2209     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2210     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2211     for (i=0; i<is_n; i++) {
2212       bb[is_idx[i]] = diag*xx[is_idx[i]];
2213     }
2214     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2215     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2216   }
2217 
2218   /* Make a copy of the IS and  sort it */
2219   /* allocate memory for rows,sizes */
2220   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2221 
2222   /* copy IS values to rows, and sort them */
2223   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2225 
2226   if (baij->keepnonzeropattern) {
2227     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228     bs_max = is_n;
2229     A->same_nonzero = PETSC_TRUE;
2230   } else {
2231     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2232     A->same_nonzero = PETSC_FALSE;
2233   }
2234 
2235   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236     row   = rows[j];
2237     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241       if (diag != (PetscScalar)0.0) {
2242         if (baij->ilen[row/bs] > 0) {
2243           baij->ilen[row/bs]       = 1;
2244           baij->j[baij->i[row/bs]] = row/bs;
2245           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2246         }
2247         /* Now insert all the diagonal values for this bs */
2248         for (k=0; k<bs; k++) {
2249           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2250         }
2251       } else { /* (diag == 0.0) */
2252         baij->ilen[row/bs] = 0;
2253       } /* end (diag == 0.0) */
2254     } else { /* (sizes[i] != bs) */
2255 #if defined (PETSC_USE_DEBUG)
2256       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257 #endif
2258       for (k=0; k<count; k++) {
2259         aa[0] =  zero;
2260         aa    += bs;
2261       }
2262       if (diag != (PetscScalar)0.0) {
2263         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2264       }
2265     }
2266   }
2267 
2268   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2275 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276 {
2277   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2278   PetscErrorCode    ierr;
2279   PetscInt          i,j,k,count;
2280   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281   PetscScalar       zero = 0.0;
2282   MatScalar         *aa;
2283   const PetscScalar *xx;
2284   PetscScalar       *bb;
2285   PetscBool         *zeroed,vecs = PETSC_FALSE;
2286 
2287   PetscFunctionBegin;
2288   /* fix right hand side if needed */
2289   if (x && b) {
2290     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2291     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2292     vecs = PETSC_TRUE;
2293   }
2294   A->same_nonzero = PETSC_TRUE;
2295 
2296   /* zero the columns */
2297   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2298   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2299   for (i=0; i<is_n; i++) {
2300     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301     zeroed[is_idx[i]] = PETSC_TRUE;
2302   }
2303   for (i=0; i<A->rmap->N; i++) {
2304     if (!zeroed[i]) {
2305       row = i/bs;
2306       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307         for (k=0; k<bs; k++) {
2308           col = bs*baij->j[j] + k;
2309 	  if (zeroed[col]) {
2310 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311             if (vecs) bb[i] -= aa[0]*xx[col];
2312             aa[0] = 0.0;
2313           }
2314         }
2315       }
2316     } else if (vecs) bb[i] = diag*xx[i];
2317   }
2318   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2319   if (vecs) {
2320     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2321     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2322   }
2323 
2324   /* zero the rows */
2325   for (i=0; i<is_n; i++) {
2326     row   = is_idx[i];
2327     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329     for (k=0; k<count; k++) {
2330       aa[0] =  zero;
2331       aa    += bs;
2332     }
2333     if (diag != (PetscScalar)0.0) {
2334       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2335     }
2336   }
2337   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2343 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344 {
2345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2349   PetscErrorCode ierr;
2350   PetscInt       ridx,cidx,bs2=a->bs2;
2351   PetscBool      roworiented=a->roworiented;
2352   MatScalar      *ap,value,*aa=a->a,*bap;
2353 
2354   PetscFunctionBegin;
2355   if (v) PetscValidScalarPointer(v,6);
2356   for (k=0; k<m; k++) { /* loop over added rows */
2357     row  = im[k];
2358     brow = row/bs;
2359     if (row < 0) continue;
2360 #if defined(PETSC_USE_DEBUG)
2361     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362 #endif
2363     rp   = aj + ai[brow];
2364     ap   = aa + bs2*ai[brow];
2365     rmax = imax[brow];
2366     nrow = ailen[brow];
2367     low  = 0;
2368     high = nrow;
2369     for (l=0; l<n; l++) { /* loop over added columns */
2370       if (in[l] < 0) continue;
2371 #if defined(PETSC_USE_DEBUG)
2372       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373 #endif
2374       col = in[l]; bcol = col/bs;
2375       ridx = row % bs; cidx = col % bs;
2376       if (roworiented) {
2377         value = v[l + k*n];
2378       } else {
2379         value = v[k + l*m];
2380       }
2381       if (col <= lastcol) low = 0; else high = nrow;
2382       lastcol = col;
2383       while (high-low > 7) {
2384         t = (low+high)/2;
2385         if (rp[t] > bcol) high = t;
2386         else              low  = t;
2387       }
2388       for (i=low; i<high; i++) {
2389         if (rp[i] > bcol) break;
2390         if (rp[i] == bcol) {
2391           bap  = ap +  bs2*i + bs*cidx + ridx;
2392           if (is == ADD_VALUES) *bap += value;
2393           else                  *bap  = value;
2394           goto noinsert1;
2395         }
2396       }
2397       if (nonew == 1) goto noinsert1;
2398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400       N = nrow++ - 1; high++;
2401       /* shift up all the later entries in this row */
2402       for (ii=N; ii>=i; ii--) {
2403         rp[ii+1] = rp[ii];
2404         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       if (N>=i) {
2407         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2408       }
2409       rp[i]                      = bcol;
2410       ap[bs2*i + bs*cidx + ridx] = value;
2411       a->nz++;
2412       noinsert1:;
2413       low = i;
2414     }
2415     ailen[brow] = nrow;
2416   }
2417   A->same_nonzero = PETSC_FALSE;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2423 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2426   Mat            outA;
2427   PetscErrorCode ierr;
2428   PetscBool      row_identity,col_identity;
2429 
2430   PetscFunctionBegin;
2431   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2433   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2434   if (!row_identity || !col_identity) {
2435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436   }
2437 
2438   outA            = inA;
2439   inA->factortype = MAT_FACTOR_LU;
2440 
2441   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2442 
2443   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2445   a->row = row;
2446   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2447   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2448   a->col = col;
2449 
2450   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2452    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2453   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2454 
2455   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2456   if (!a->solve_work) {
2457     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2458     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2459   }
2460   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2461 
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 EXTERN_C_BEGIN
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2468 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469 {
2470   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471   PetscInt    i,nz,mbs;
2472 
2473   PetscFunctionBegin;
2474   nz  = baij->maxnz;
2475   mbs = baij->mbs;
2476   for (i=0; i<nz; i++) {
2477     baij->j[i] = indices[i];
2478   }
2479   baij->nz = nz;
2480   for (i=0; i<mbs; i++) {
2481     baij->ilen[i] = baij->imax[i];
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 EXTERN_C_END
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2489 /*@
2490     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491        in the matrix.
2492 
2493   Input Parameters:
2494 +  mat - the SeqBAIJ matrix
2495 -  indices - the column indices
2496 
2497   Level: advanced
2498 
2499   Notes:
2500     This can be called if you have precomputed the nonzero structure of the
2501   matrix and want to provide it to the matrix object to improve the performance
2502   of the MatSetValues() operation.
2503 
2504     You MUST have set the correct numbers of nonzeros per row in the call to
2505   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2506 
2507     MUST be called before any calls to MatSetValues();
2508 
2509 @*/
2510 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511 {
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2516   PetscValidPointer(indices,2);
2517   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2523 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524 {
2525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526   PetscErrorCode ierr;
2527   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2528   PetscReal      atmp;
2529   PetscScalar    *x,zero = 0.0;
2530   MatScalar      *aa;
2531   PetscInt       ncols,brow,krow,kcol;
2532 
2533   PetscFunctionBegin;
2534   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535   bs   = A->rmap->bs;
2536   aa   = a->a;
2537   ai   = a->i;
2538   aj   = a->j;
2539   mbs  = a->mbs;
2540 
2541   ierr = VecSet(v,zero);CHKERRQ(ierr);
2542   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2543   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2544   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545   for (i=0; i<mbs; i++) {
2546     ncols = ai[1] - ai[0]; ai++;
2547     brow  = bs*i;
2548     for (j=0; j<ncols; j++){
2549       for (kcol=0; kcol<bs; kcol++){
2550         for (krow=0; krow<bs; krow++){
2551           atmp = PetscAbsScalar(*aa);aa++;
2552           row = brow + krow;    /* row index */
2553           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555         }
2556       }
2557       aj++;
2558     }
2559   }
2560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatCopy_SeqBAIJ"
2566 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   /* If the two matrices have the same copy implementation, use fast copy. */
2572   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2576 
2577     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2580   } else {
2581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2588 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   if (A->rmap->bs < 0) {
2594     ierr = PetscLayoutSetBlockSize(A->rmap,1);CHKERRQ(ierr);
2595     ierr = PetscLayoutSetBlockSize(A->cmap,1);CHKERRQ(ierr);
2596   }
2597   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2603 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2604 {
2605   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2606   PetscFunctionBegin;
2607   *array = a->a;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2613 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2614 {
2615   PetscFunctionBegin;
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2621 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2622 {
2623   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2624   PetscErrorCode ierr;
2625   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2626   PetscBLASInt   one=1;
2627 
2628   PetscFunctionBegin;
2629   if (str == SAME_NONZERO_PATTERN) {
2630     PetscScalar alpha = a;
2631     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2632     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2633   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2634     if (y->xtoy && y->XtoY != X) {
2635       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2636       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2637     }
2638     if (!y->xtoy) { /* get xtoy */
2639       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2640       y->XtoY = X;
2641       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2642     }
2643     for (i=0; i<x->nz; i++) {
2644       j = 0;
2645       while (j < bs2){
2646         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2647         j++;
2648       }
2649     }
2650     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2651   } else {
2652     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2653   }
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2659 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2660 {
2661   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2662   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2663   MatScalar      *aa = a->a;
2664 
2665   PetscFunctionBegin;
2666   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2672 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2673 {
2674   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2675   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2676   MatScalar      *aa = a->a;
2677 
2678   PetscFunctionBegin;
2679   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2684 
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2687 /*
2688     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2689 */
2690 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2691 {
2692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2693   PetscErrorCode ierr;
2694   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2695   PetscInt       nz = a->i[m],row,*jj,mr,col;
2696 
2697   PetscFunctionBegin;
2698   *nn = n;
2699   if (!ia) PetscFunctionReturn(0);
2700   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2701   else {
2702     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2703     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2704     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2705     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2706     jj = a->j;
2707     for (i=0; i<nz; i++) {
2708       collengths[jj[i]]++;
2709     }
2710     cia[0] = oshift;
2711     for (i=0; i<n; i++) {
2712       cia[i+1] = cia[i] + collengths[i];
2713     }
2714     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2715     jj   = a->j;
2716     for (row=0; row<m; row++) {
2717       mr = a->i[row+1] - a->i[row];
2718       for (i=0; i<mr; i++) {
2719         col = *jj++;
2720         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2721       }
2722     }
2723     ierr = PetscFree(collengths);CHKERRQ(ierr);
2724     *ia = cia; *ja = cja;
2725   }
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2731 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2732 {
2733   PetscErrorCode ierr;
2734 
2735   PetscFunctionBegin;
2736   if (!ia) PetscFunctionReturn(0);
2737   ierr = PetscFree(*ia);CHKERRQ(ierr);
2738   ierr = PetscFree(*ja);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2744 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2745 {
2746   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2747   PetscErrorCode ierr;
2748   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2749   PetscScalar    dx,*y,*xx,*w3_array;
2750   PetscScalar    *vscale_array;
2751   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2752   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2753   void           *fctx = coloring->fctx;
2754   PetscBool      flg = PETSC_FALSE;
2755   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2756   Vec            x1_tmp;
2757 
2758   PetscFunctionBegin;
2759   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2760   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2761   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2762   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2763 
2764   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2765   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2766   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2767   if (flg) {
2768     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2769   } else {
2770     PetscBool  assembled;
2771     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2772     if (assembled) {
2773       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2774     }
2775   }
2776 
2777   x1_tmp = x1;
2778   if (!coloring->vscale){
2779     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2780   }
2781 
2782   /*
2783     This is a horrible, horrible, hack.
2784   */
2785   if (coloring->F) {
2786     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2787     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2788     if (m1 != m2) {
2789       coloring->F = 0;
2790       }
2791     }
2792 
2793   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2794     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2795   }
2796   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2797 
2798   /* Set w1 = F(x1) */
2799   if (coloring->F) {
2800     w1          = coloring->F; /* use already computed value of function */
2801     coloring->F = 0;
2802   } else {
2803     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2804     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2805     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2806   }
2807 
2808   if (!coloring->w3) {
2809     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2810     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2811   }
2812   w3 = coloring->w3;
2813 
2814     CHKMEMQ;
2815     /* Compute all the local scale factors, including ghost points */
2816   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2817   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2818   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2819   if (ctype == IS_COLORING_GHOSTED){
2820     col_start = 0; col_end = N;
2821   } else if (ctype == IS_COLORING_GLOBAL){
2822     xx = xx - start;
2823     vscale_array = vscale_array - start;
2824     col_start = start; col_end = N + start;
2825   }    CHKMEMQ;
2826   for (col=col_start; col<col_end; col++){
2827     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2828     if (coloring->htype[0] == 'w') {
2829       dx = 1.0 + unorm;
2830     } else {
2831       dx  = xx[col];
2832     }
2833     if (dx == (PetscScalar)0.0) dx = 1.0;
2834 #if !defined(PETSC_USE_COMPLEX)
2835     if (dx < umin && dx >= 0.0)      dx = umin;
2836     else if (dx < 0.0 && dx > -umin) dx = -umin;
2837 #else
2838     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2839     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2840 #endif
2841     dx               *= epsilon;
2842     vscale_array[col] = (PetscScalar)1.0/dx;
2843   }     CHKMEMQ;
2844   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2845   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2846   if (ctype == IS_COLORING_GLOBAL){
2847     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2848     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2849   }
2850   CHKMEMQ;
2851   if (coloring->vscaleforrow) {
2852     vscaleforrow = coloring->vscaleforrow;
2853   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2854 
2855   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2856   /*
2857     Loop over each color
2858   */
2859   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2860   for (k=0; k<coloring->ncolors; k++) {
2861     coloring->currentcolor = k;
2862     for (i=0; i<bs; i++) {
2863       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2864       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2865       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2866       /*
2867 	Loop over each column associated with color
2868 	adding the perturbation to the vector w3.
2869       */
2870       for (l=0; l<coloring->ncolumns[k]; l++) {
2871 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2872 	if (coloring->htype[0] == 'w') {
2873 	  dx = 1.0 + unorm;
2874 	} else {
2875 	  dx  = xx[col];
2876 	}
2877 	if (dx == (PetscScalar)0.0) dx = 1.0;
2878 #if !defined(PETSC_USE_COMPLEX)
2879 	if (dx < umin && dx >= 0.0)      dx = umin;
2880 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2881 #else
2882 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2883 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2884 #endif
2885 	dx            *= epsilon;
2886 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2887 	w3_array[col] += dx;
2888       }
2889       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2890       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2891 
2892       /*
2893 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2894 	w2 = F(x1 + dx) - F(x1)
2895       */
2896       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2897       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2898       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2899       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2900 
2901       /*
2902 	Loop over rows of vector, putting results into Jacobian matrix
2903       */
2904       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2905       for (l=0; l<coloring->nrows[k]; l++) {
2906 	row    = bs*coloring->rows[k][l];             /* local row index */
2907 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2908         for (j=0; j<bs; j++) {
2909   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2910           srows[j]  = row + start + j;
2911         }
2912 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2913       }
2914       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2915     }
2916   } /* endof for each color */
2917   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2918   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2919   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2920   ierr = PetscFree(srows);CHKERRQ(ierr);
2921 
2922   coloring->currentcolor = -1;
2923   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 /* -------------------------------------------------------------------*/
2930 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2931        MatGetRow_SeqBAIJ,
2932        MatRestoreRow_SeqBAIJ,
2933        MatMult_SeqBAIJ_N,
2934 /* 4*/ MatMultAdd_SeqBAIJ_N,
2935        MatMultTranspose_SeqBAIJ,
2936        MatMultTransposeAdd_SeqBAIJ,
2937        0,
2938        0,
2939        0,
2940 /*10*/ 0,
2941        MatLUFactor_SeqBAIJ,
2942        0,
2943        0,
2944        MatTranspose_SeqBAIJ,
2945 /*15*/ MatGetInfo_SeqBAIJ,
2946        MatEqual_SeqBAIJ,
2947        MatGetDiagonal_SeqBAIJ,
2948        MatDiagonalScale_SeqBAIJ,
2949        MatNorm_SeqBAIJ,
2950 /*20*/ 0,
2951        MatAssemblyEnd_SeqBAIJ,
2952        MatSetOption_SeqBAIJ,
2953        MatZeroEntries_SeqBAIJ,
2954 /*24*/ MatZeroRows_SeqBAIJ,
2955        0,
2956        0,
2957        0,
2958        0,
2959 /*29*/ MatSetUp_SeqBAIJ,
2960        0,
2961        0,
2962        MatGetArray_SeqBAIJ,
2963        MatRestoreArray_SeqBAIJ,
2964 /*34*/ MatDuplicate_SeqBAIJ,
2965        0,
2966        0,
2967        MatILUFactor_SeqBAIJ,
2968        0,
2969 /*39*/ MatAXPY_SeqBAIJ,
2970        MatGetSubMatrices_SeqBAIJ,
2971        MatIncreaseOverlap_SeqBAIJ,
2972        MatGetValues_SeqBAIJ,
2973        MatCopy_SeqBAIJ,
2974 /*44*/ 0,
2975        MatScale_SeqBAIJ,
2976        0,
2977        0,
2978        MatZeroRowsColumns_SeqBAIJ,
2979 /*49*/ 0,
2980        MatGetRowIJ_SeqBAIJ,
2981        MatRestoreRowIJ_SeqBAIJ,
2982        MatGetColumnIJ_SeqBAIJ,
2983        MatRestoreColumnIJ_SeqBAIJ,
2984 /*54*/ MatFDColoringCreate_SeqAIJ,
2985        0,
2986        0,
2987        0,
2988        MatSetValuesBlocked_SeqBAIJ,
2989 /*59*/ MatGetSubMatrix_SeqBAIJ,
2990        MatDestroy_SeqBAIJ,
2991        MatView_SeqBAIJ,
2992        0,
2993        0,
2994 /*64*/ 0,
2995        0,
2996        0,
2997        0,
2998        0,
2999 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3000        0,
3001        MatConvert_Basic,
3002        0,
3003        0,
3004 /*74*/ 0,
3005        MatFDColoringApply_BAIJ,
3006        0,
3007        0,
3008        0,
3009 /*79*/ 0,
3010        0,
3011        0,
3012        0,
3013        MatLoad_SeqBAIJ,
3014 /*84*/ 0,
3015        0,
3016        0,
3017        0,
3018        0,
3019 /*89*/ 0,
3020        0,
3021        0,
3022        0,
3023        0,
3024 /*94*/ 0,
3025        0,
3026        0,
3027        0,
3028        0,
3029 /*99*/0,
3030        0,
3031        0,
3032        0,
3033        0,
3034 /*104*/0,
3035        MatRealPart_SeqBAIJ,
3036        MatImaginaryPart_SeqBAIJ,
3037        0,
3038        0,
3039 /*109*/0,
3040        0,
3041        0,
3042        0,
3043        MatMissingDiagonal_SeqBAIJ,
3044 /*114*/0,
3045        0,
3046        0,
3047        0,
3048        0,
3049 /*119*/0,
3050        0,
3051        MatMultHermitianTranspose_SeqBAIJ,
3052        MatMultHermitianTransposeAdd_SeqBAIJ,
3053        0,
3054 /*124*/0,
3055        0,
3056        MatInvertBlockDiagonal_SeqBAIJ
3057 };
3058 
3059 EXTERN_C_BEGIN
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3062 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3063 {
3064   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3065   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3066   PetscErrorCode ierr;
3067 
3068   PetscFunctionBegin;
3069   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3070 
3071   /* allocate space for values if not already there */
3072   if (!aij->saved_values) {
3073     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3074     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3075   }
3076 
3077   /* copy values over */
3078   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3079   PetscFunctionReturn(0);
3080 }
3081 EXTERN_C_END
3082 
3083 EXTERN_C_BEGIN
3084 #undef __FUNCT__
3085 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3086 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3087 {
3088   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3089   PetscErrorCode ierr;
3090   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3091 
3092   PetscFunctionBegin;
3093   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3094   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3095 
3096   /* copy values over */
3097   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3098   PetscFunctionReturn(0);
3099 }
3100 EXTERN_C_END
3101 
3102 EXTERN_C_BEGIN
3103 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3104 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3105 EXTERN_C_END
3106 
3107 EXTERN_C_BEGIN
3108 #undef __FUNCT__
3109 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3110 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3111 {
3112   Mat_SeqBAIJ    *b;
3113   PetscErrorCode ierr;
3114   PetscInt       i,mbs,nbs,bs2;
3115   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3116 
3117   PetscFunctionBegin;
3118   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3119   if (nz == MAT_SKIP_ALLOCATION) {
3120     skipallocation = PETSC_TRUE;
3121     nz             = 0;
3122   }
3123 
3124   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3125   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3126   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3127   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3128 
3129   B->preallocated = PETSC_TRUE;
3130 
3131   mbs  = B->rmap->n/bs;
3132   nbs  = B->cmap->n/bs;
3133   bs2  = bs*bs;
3134 
3135   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3136 
3137   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3138   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3139   if (nnz) {
3140     for (i=0; i<mbs; i++) {
3141       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3142       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3143     }
3144   }
3145 
3146   b       = (Mat_SeqBAIJ*)B->data;
3147   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3148     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3149   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3150 
3151   if (!flg) {
3152     switch (bs) {
3153     case 1:
3154       B->ops->mult            = MatMult_SeqBAIJ_1;
3155       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3156       B->ops->sor             = MatSOR_SeqBAIJ_1;
3157       break;
3158     case 2:
3159       B->ops->mult            = MatMult_SeqBAIJ_2;
3160       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3161       B->ops->sor             = MatSOR_SeqBAIJ_2;
3162       break;
3163     case 3:
3164       B->ops->mult            = MatMult_SeqBAIJ_3;
3165       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3166       B->ops->sor             = MatSOR_SeqBAIJ_3;
3167       break;
3168     case 4:
3169       B->ops->mult            = MatMult_SeqBAIJ_4;
3170       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3171       B->ops->sor             = MatSOR_SeqBAIJ_4;
3172       break;
3173     case 5:
3174       B->ops->mult            = MatMult_SeqBAIJ_5;
3175       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3176       B->ops->sor             = MatSOR_SeqBAIJ_5;
3177       break;
3178     case 6:
3179       B->ops->mult            = MatMult_SeqBAIJ_6;
3180       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3181       B->ops->sor             = MatSOR_SeqBAIJ_6;
3182       break;
3183     case 7:
3184       B->ops->mult            = MatMult_SeqBAIJ_7;
3185       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3186       B->ops->sor             = MatSOR_SeqBAIJ_7;
3187       break;
3188     case 15:
3189       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3190       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3191       B->ops->sor             = MatSOR_SeqBAIJ_N;
3192       break;
3193     default:
3194       B->ops->mult            = MatMult_SeqBAIJ_N;
3195       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3196       B->ops->sor             = MatSOR_SeqBAIJ_N;
3197       break;
3198     }
3199   }
3200   b->mbs       = mbs;
3201   b->nbs       = nbs;
3202   if (!skipallocation) {
3203     if (!b->imax) {
3204       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3205       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3206       b->free_imax_ilen = PETSC_TRUE;
3207     }
3208     /* b->ilen will count nonzeros in each block row so far. */
3209     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3210     if (!nnz) {
3211       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3212       else if (nz < 0) nz = 1;
3213       for (i=0; i<mbs; i++) b->imax[i] = nz;
3214       nz = nz*mbs;
3215     } else {
3216       nz = 0;
3217       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3218     }
3219 
3220     /* allocate the matrix space */
3221     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3222     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3223     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3224     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3225     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3226     b->singlemalloc = PETSC_TRUE;
3227     b->i[0] = 0;
3228     for (i=1; i<mbs+1; i++) {
3229       b->i[i] = b->i[i-1] + b->imax[i-1];
3230     }
3231     b->free_a     = PETSC_TRUE;
3232     b->free_ij    = PETSC_TRUE;
3233   } else {
3234     b->free_a     = PETSC_FALSE;
3235     b->free_ij    = PETSC_FALSE;
3236   }
3237 
3238   b->bs2              = bs2;
3239   b->mbs              = mbs;
3240   b->nz               = 0;
3241   b->maxnz            = nz;
3242   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3243   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3244   PetscFunctionReturn(0);
3245 }
3246 EXTERN_C_END
3247 
3248 EXTERN_C_BEGIN
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3251 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3252 {
3253   PetscInt       i,m,nz,nz_max=0,*nnz;
3254   PetscScalar    *values=0;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3259   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3260   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3261   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3262   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3263   m = B->rmap->n/bs;
3264 
3265   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3266   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3267   for(i=0; i<m; i++) {
3268     nz = ii[i+1]- ii[i];
3269     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3270     nz_max = PetscMax(nz_max, nz);
3271     nnz[i] = nz;
3272   }
3273   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3274   ierr = PetscFree(nnz);CHKERRQ(ierr);
3275 
3276   values = (PetscScalar*)V;
3277   if (!values) {
3278     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3279     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3280   }
3281   for (i=0; i<m; i++) {
3282     PetscInt          ncols  = ii[i+1] - ii[i];
3283     const PetscInt    *icols = jj + ii[i];
3284     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3285     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3286   }
3287   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3291   PetscFunctionReturn(0);
3292 }
3293 EXTERN_C_END
3294 
3295 
3296 EXTERN_C_BEGIN
3297 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3298 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3299 #if defined(PETSC_HAVE_MUMPS)
3300 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3301 #endif
3302 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3303 EXTERN_C_END
3304 
3305 /*MC
3306    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3307    block sparse compressed row format.
3308 
3309    Options Database Keys:
3310 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3311 
3312   Level: beginner
3313 
3314 .seealso: MatCreateSeqBAIJ()
3315 M*/
3316 
3317 EXTERN_C_BEGIN
3318 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3319 EXTERN_C_END
3320 
3321 EXTERN_C_BEGIN
3322 #undef __FUNCT__
3323 #define __FUNCT__ "MatCreate_SeqBAIJ"
3324 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3325 {
3326   PetscErrorCode ierr;
3327   PetscMPIInt    size;
3328   Mat_SeqBAIJ    *b;
3329 
3330   PetscFunctionBegin;
3331   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3332   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3333 
3334   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3335   B->data = (void*)b;
3336   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3337   b->row                   = 0;
3338   b->col                   = 0;
3339   b->icol                  = 0;
3340   b->reallocs              = 0;
3341   b->saved_values          = 0;
3342 
3343   b->roworiented           = PETSC_TRUE;
3344   b->nonew                 = 0;
3345   b->diag                  = 0;
3346   b->solve_work            = 0;
3347   b->mult_work             = 0;
3348   B->spptr                 = 0;
3349   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3350   b->keepnonzeropattern    = PETSC_FALSE;
3351   b->xtoy                  = 0;
3352   b->XtoY                  = 0;
3353   B->same_nonzero          = PETSC_FALSE;
3354 
3355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3356                                      "MatGetFactorAvailable_seqbaij_petsc",
3357                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3359                                      "MatGetFactor_seqbaij_petsc",
3360                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3362                                      "MatGetFactor_seqbaij_bstrm",
3363                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3364 #if defined(PETSC_HAVE_MUMPS)
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3366 #endif
3367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3368                                      "MatInvertBlockDiagonal_SeqBAIJ",
3369                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3371                                      "MatStoreValues_SeqBAIJ",
3372                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3374                                      "MatRetrieveValues_SeqBAIJ",
3375                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3377                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3378                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3380                                      "MatConvert_SeqBAIJ_SeqAIJ",
3381                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3383                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3384                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3386                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3387                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3389                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3390                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3391   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3392                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3393                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3395                                      "MatIsTranspose_SeqBAIJ",
3396                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3397   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3398   PetscFunctionReturn(0);
3399 }
3400 EXTERN_C_END
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3404 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3405 {
3406   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3407   PetscErrorCode ierr;
3408   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3409 
3410   PetscFunctionBegin;
3411   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3412 
3413   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3414     c->imax = a->imax;
3415     c->ilen = a->ilen;
3416     c->free_imax_ilen = PETSC_FALSE;
3417   } else {
3418     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3419     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3420     for (i=0; i<mbs; i++) {
3421       c->imax[i] = a->imax[i];
3422       c->ilen[i] = a->ilen[i];
3423     }
3424     c->free_imax_ilen = PETSC_TRUE;
3425   }
3426 
3427   /* allocate the matrix space */
3428   if (mallocmatspace){
3429     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3430       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3431       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3432       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3433       c->i            = a->i;
3434       c->j            = a->j;
3435       c->singlemalloc = PETSC_FALSE;
3436       c->free_a       = PETSC_TRUE;
3437       c->free_ij      = PETSC_FALSE;
3438       c->parent       = A;
3439       C->preallocated = PETSC_TRUE;
3440       C->assembled    = PETSC_TRUE;
3441       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3442       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3443       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3444     } else {
3445       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3446       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3447       c->singlemalloc = PETSC_TRUE;
3448       c->free_a       = PETSC_TRUE;
3449       c->free_ij      = PETSC_TRUE;
3450       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3451       if (mbs > 0) {
3452 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3453 	if (cpvalues == MAT_COPY_VALUES) {
3454 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3455 	} else {
3456 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3457 	}
3458       }
3459       C->preallocated = PETSC_TRUE;
3460       C->assembled    = PETSC_TRUE;
3461     }
3462   }
3463 
3464   c->roworiented = a->roworiented;
3465   c->nonew       = a->nonew;
3466   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3467   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3468   c->bs2         = a->bs2;
3469   c->mbs         = a->mbs;
3470   c->nbs         = a->nbs;
3471 
3472   if (a->diag) {
3473     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3474       c->diag      = a->diag;
3475       c->free_diag = PETSC_FALSE;
3476     } else {
3477       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3478       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3479       for (i=0; i<mbs; i++) {
3480         c->diag[i] = a->diag[i];
3481       }
3482       c->free_diag = PETSC_TRUE;
3483     }
3484   } else c->diag        = 0;
3485   c->nz                 = a->nz;
3486   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3487   c->solve_work         = 0;
3488   c->mult_work          = 0;
3489 
3490   c->compressedrow.use     = a->compressedrow.use;
3491   c->compressedrow.nrows   = a->compressedrow.nrows;
3492   c->compressedrow.check   = a->compressedrow.check;
3493   if (a->compressedrow.use){
3494     i = a->compressedrow.nrows;
3495     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3496     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3497     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3498     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3499   } else {
3500     c->compressedrow.use    = PETSC_FALSE;
3501     c->compressedrow.i      = PETSC_NULL;
3502     c->compressedrow.rindex = PETSC_NULL;
3503   }
3504   C->same_nonzero = A->same_nonzero;
3505   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3506   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3512 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3513 {
3514     PetscErrorCode ierr;
3515 
3516   PetscFunctionBegin;
3517   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3518   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3519   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3520   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3521   PetscFunctionReturn(0);
3522 }
3523 
3524 #undef __FUNCT__
3525 #define __FUNCT__ "MatLoad_SeqBAIJ"
3526 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3527 {
3528   Mat_SeqBAIJ    *a;
3529   PetscErrorCode ierr;
3530   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3531   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3532   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3533   PetscInt       *masked,nmask,tmp,bs2,ishift;
3534   PetscMPIInt    size;
3535   int            fd;
3536   PetscScalar    *aa;
3537   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3538 
3539   PetscFunctionBegin;
3540   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3541   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3542   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3543   bs2  = bs*bs;
3544 
3545   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3546   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3547   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3548   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3549   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3550   M = header[1]; N = header[2]; nz = header[3];
3551 
3552   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3553   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3554 
3555   /*
3556      This code adds extra rows to make sure the number of rows is
3557     divisible by the blocksize
3558   */
3559   mbs        = M/bs;
3560   extra_rows = bs - M + bs*(mbs);
3561   if (extra_rows == bs) extra_rows = 0;
3562   else                  mbs++;
3563   if (extra_rows) {
3564     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3565   }
3566 
3567   /* Set global sizes if not already set */
3568   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3569     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3570   } else { /* Check if the matrix global sizes are correct */
3571     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3572     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3573       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3574     }
3575     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3576   }
3577 
3578   /* read in row lengths */
3579   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3580   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3581   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3582 
3583   /* read in column indices */
3584   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3585   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3586   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3587 
3588   /* loop over row lengths determining block row lengths */
3589   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3590   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3591   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3592   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3593   rowcount = 0;
3594   nzcount = 0;
3595   for (i=0; i<mbs; i++) {
3596     nmask = 0;
3597     for (j=0; j<bs; j++) {
3598       kmax = rowlengths[rowcount];
3599       for (k=0; k<kmax; k++) {
3600         tmp = jj[nzcount++]/bs;
3601         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3602       }
3603       rowcount++;
3604     }
3605     browlengths[i] += nmask;
3606     /* zero out the mask elements we set */
3607     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3608   }
3609 
3610   /* Do preallocation  */
3611   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3612   a = (Mat_SeqBAIJ*)newmat->data;
3613 
3614   /* set matrix "i" values */
3615   a->i[0] = 0;
3616   for (i=1; i<= mbs; i++) {
3617     a->i[i]      = a->i[i-1] + browlengths[i-1];
3618     a->ilen[i-1] = browlengths[i-1];
3619   }
3620   a->nz         = 0;
3621   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3622 
3623   /* read in nonzero values */
3624   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3625   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3626   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3627 
3628   /* set "a" and "j" values into matrix */
3629   nzcount = 0; jcount = 0;
3630   for (i=0; i<mbs; i++) {
3631     nzcountb = nzcount;
3632     nmask    = 0;
3633     for (j=0; j<bs; j++) {
3634       kmax = rowlengths[i*bs+j];
3635       for (k=0; k<kmax; k++) {
3636         tmp = jj[nzcount++]/bs;
3637 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3638       }
3639     }
3640     /* sort the masked values */
3641     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3642 
3643     /* set "j" values into matrix */
3644     maskcount = 1;
3645     for (j=0; j<nmask; j++) {
3646       a->j[jcount++]  = masked[j];
3647       mask[masked[j]] = maskcount++;
3648     }
3649     /* set "a" values into matrix */
3650     ishift = bs2*a->i[i];
3651     for (j=0; j<bs; j++) {
3652       kmax = rowlengths[i*bs+j];
3653       for (k=0; k<kmax; k++) {
3654         tmp       = jj[nzcountb]/bs ;
3655         block     = mask[tmp] - 1;
3656         point     = jj[nzcountb] - bs*tmp;
3657         idx       = ishift + bs2*block + j + bs*point;
3658         a->a[idx] = (MatScalar)aa[nzcountb++];
3659       }
3660     }
3661     /* zero out the mask elements we set */
3662     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3663   }
3664   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3665 
3666   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3667   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3668   ierr = PetscFree(aa);CHKERRQ(ierr);
3669   ierr = PetscFree(jj);CHKERRQ(ierr);
3670   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3671 
3672   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3673   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3674   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 #undef __FUNCT__
3679 #define __FUNCT__ "MatCreateSeqBAIJ"
3680 /*@C
3681    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3682    compressed row) format.  For good matrix assembly performance the
3683    user should preallocate the matrix storage by setting the parameter nz
3684    (or the array nnz).  By setting these parameters accurately, performance
3685    during matrix assembly can be increased by more than a factor of 50.
3686 
3687    Collective on MPI_Comm
3688 
3689    Input Parameters:
3690 +  comm - MPI communicator, set to PETSC_COMM_SELF
3691 .  bs - size of block
3692 .  m - number of rows
3693 .  n - number of columns
3694 .  nz - number of nonzero blocks  per block row (same for all rows)
3695 -  nnz - array containing the number of nonzero blocks in the various block rows
3696          (possibly different for each block row) or PETSC_NULL
3697 
3698    Output Parameter:
3699 .  A - the matrix
3700 
3701    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3702    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3703    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3704 
3705    Options Database Keys:
3706 .   -mat_no_unroll - uses code that does not unroll the loops in the
3707                      block calculations (much slower)
3708 .    -mat_block_size - size of the blocks to use
3709 
3710    Level: intermediate
3711 
3712    Notes:
3713    The number of rows and columns must be divisible by blocksize.
3714 
3715    If the nnz parameter is given then the nz parameter is ignored
3716 
3717    A nonzero block is any block that as 1 or more nonzeros in it
3718 
3719    The block AIJ format is fully compatible with standard Fortran 77
3720    storage.  That is, the stored row and column indices can begin at
3721    either one (as in Fortran) or zero.  See the users' manual for details.
3722 
3723    Specify the preallocated storage with either nz or nnz (not both).
3724    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3725    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3726    matrices.
3727 
3728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3729 @*/
3730 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3731 {
3732   PetscErrorCode ierr;
3733 
3734   PetscFunctionBegin;
3735   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3736   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3737   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3738   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3744 /*@C
3745    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3746    per row in the matrix. For good matrix assembly performance the
3747    user should preallocate the matrix storage by setting the parameter nz
3748    (or the array nnz).  By setting these parameters accurately, performance
3749    during matrix assembly can be increased by more than a factor of 50.
3750 
3751    Collective on MPI_Comm
3752 
3753    Input Parameters:
3754 +  A - the matrix
3755 .  bs - size of block
3756 .  nz - number of block nonzeros per block row (same for all rows)
3757 -  nnz - array containing the number of block nonzeros in the various block rows
3758          (possibly different for each block row) or PETSC_NULL
3759 
3760    Options Database Keys:
3761 .   -mat_no_unroll - uses code that does not unroll the loops in the
3762                      block calculations (much slower)
3763 .    -mat_block_size - size of the blocks to use
3764 
3765    Level: intermediate
3766 
3767    Notes:
3768    If the nnz parameter is given then the nz parameter is ignored
3769 
3770    You can call MatGetInfo() to get information on how effective the preallocation was;
3771    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3772    You can also run with the option -info and look for messages with the string
3773    malloc in them to see if additional memory allocation was needed.
3774 
3775    The block AIJ format is fully compatible with standard Fortran 77
3776    storage.  That is, the stored row and column indices can begin at
3777    either one (as in Fortran) or zero.  See the users' manual for details.
3778 
3779    Specify the preallocated storage with either nz or nnz (not both).
3780    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3781    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3782 
3783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3784 @*/
3785 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3786 {
3787   PetscErrorCode ierr;
3788 
3789   PetscFunctionBegin;
3790   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3791   PetscValidType(B,1);
3792   PetscValidLogicalCollectiveInt(B,bs,2);
3793   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3799 /*@C
3800    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3801    (the default sequential PETSc format).
3802 
3803    Collective on MPI_Comm
3804 
3805    Input Parameters:
3806 +  A - the matrix
3807 .  i - the indices into j for the start of each local row (starts with zero)
3808 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3809 -  v - optional values in the matrix
3810 
3811    Level: developer
3812 
3813 .keywords: matrix, aij, compressed row, sparse
3814 
3815 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3816 @*/
3817 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3818 {
3819   PetscErrorCode ierr;
3820 
3821   PetscFunctionBegin;
3822   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3823   PetscValidType(B,1);
3824   PetscValidLogicalCollectiveInt(B,bs,2);
3825   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 
3830 #undef __FUNCT__
3831 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3832 /*@
3833      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3834 
3835      Collective on MPI_Comm
3836 
3837    Input Parameters:
3838 +  comm - must be an MPI communicator of size 1
3839 .  bs - size of block
3840 .  m - number of rows
3841 .  n - number of columns
3842 .  i - row indices
3843 .  j - column indices
3844 -  a - matrix values
3845 
3846    Output Parameter:
3847 .  mat - the matrix
3848 
3849    Level: advanced
3850 
3851    Notes:
3852        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3853     once the matrix is destroyed
3854 
3855        You cannot set new nonzero locations into this matrix, that will generate an error.
3856 
3857        The i and j indices are 0 based
3858 
3859        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3860 
3861 
3862 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3863 
3864 @*/
3865 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3866 {
3867   PetscErrorCode ierr;
3868   PetscInt       ii;
3869   Mat_SeqBAIJ    *baij;
3870 
3871   PetscFunctionBegin;
3872   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3873   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3874 
3875   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3876   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3877   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3878   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3879   baij = (Mat_SeqBAIJ*)(*mat)->data;
3880   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3881   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3882 
3883   baij->i = i;
3884   baij->j = j;
3885   baij->a = a;
3886   baij->singlemalloc = PETSC_FALSE;
3887   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3888   baij->free_a       = PETSC_FALSE;
3889   baij->free_ij      = PETSC_FALSE;
3890 
3891   for (ii=0; ii<m; ii++) {
3892     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3893 #if defined(PETSC_USE_DEBUG)
3894     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3895 #endif
3896   }
3897 #if defined(PETSC_USE_DEBUG)
3898   for (ii=0; ii<baij->i[m]; ii++) {
3899     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3900     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3901   }
3902 #endif
3903 
3904   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908