xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a1a67e7b79bc5bc3be574f0bedd8e065f4f5a5eb)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 1:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 1*diag_offset[i];
68         diag[0]  = odiag[0];
69         mdiag[0] = odiag[0];
70         diag[0]  = 1.0 / (diag[0] + shift);
71         diag    += 1;
72         mdiag   += 1;
73       }
74       break;
75     case 2:
76       for (i=0; i<mbs; i++) {
77         odiag   = v + 4*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
81 	diag    += 4;
82 	mdiag   += 4;
83       }
84       break;
85     case 3:
86       for (i=0; i<mbs; i++) {
87         odiag    = v + 9*diag_offset[i];
88         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
89         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
90         diag[8]  = odiag[8];
91         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
92         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
93         mdiag[8] = odiag[8];
94 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
95 	diag    += 9;
96 	mdiag   += 9;
97       }
98       break;
99     case 4:
100       for (i=0; i<mbs; i++) {
101         odiag  = v + 16*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
105 	diag  += 16;
106 	mdiag += 16;
107       }
108       break;
109     case 5:
110       for (i=0; i<mbs; i++) {
111         odiag = v + 25*diag_offset[i];
112         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
114 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
115 	diag  += 25;
116 	mdiag += 25;
117       }
118       break;
119     case 6:
120       for (i=0; i<mbs; i++) {
121         odiag = v + 36*diag_offset[i];
122         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
124 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
125 	diag  += 36;
126 	mdiag += 36;
127       }
128       break;
129     default:
130       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
140 {
141   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
142   PetscScalar        *x,x1,s1;
143   const PetscScalar  *b;
144   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
145   PetscErrorCode     ierr;
146   PetscInt           m = a->mbs,i,i2,nz,j;
147   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
148 
149   PetscFunctionBegin;
150   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
151   its = its*lits;
152   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
153   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
154   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
155   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
156   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
157 
158   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
159 
160   diag  = a->diag;
161   idiag = a->idiag;
162   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
163   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
164 
165   if (flag & SOR_ZERO_INITIAL_GUESS) {
166     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
167       x[0] = b[0]*idiag[0];
168       i2     = 1;
169       idiag += 1;
170       for (i=1; i<m; i++) {
171         v     = aa + ai[i];
172         vi    = aj + ai[i];
173         nz    = diag[i] - ai[i];
174         s1    = b[i2];
175         for (j=0; j<nz; j++) {
176           s1 -= v[j]*x[vi[j]];
177         }
178         x[i2]   = idiag[0]*s1;
179         idiag   += 1;
180         i2      += 1;
181       }
182       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
183       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
184     }
185     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
186         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
187       i2    = 0;
188       mdiag = a->idiag+a->mbs;
189       for (i=0; i<m; i++) {
190         x1      = x[i2];
191         x[i2]   = mdiag[0]*x1;
192         mdiag  += 1;
193         i2     += 1;
194       }
195       ierr = PetscLogFlops(m);CHKERRQ(ierr);
196     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
197       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
198     }
199     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
200       idiag   = a->idiag+a->mbs - 1;
201       i2      = m - 1;
202       x1      = x[i2];
203       x[i2]   = idiag[0]*x1;
204       idiag -= 1;
205       i2    -= 1;
206       for (i=m-2; i>=0; i--) {
207         v     = aa + (diag[i]+1);
208         vi    = aj + diag[i] + 1;
209         nz    = ai[i+1] - diag[i] - 1;
210         s1    = x[i2];
211         for (j=0; j<nz; j++) {
212           s1 -= v[j]*x[vi[j]];
213         }
214         x[i2]   = idiag[0]*s1;
215         idiag   -= 1;
216         i2      -= 1;
217       }
218       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
219     }
220   } else {
221     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
222   }
223   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
224   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 #undef __FUNCT__
229 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
231 {
232   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
233   PetscScalar        *x,x1,x2,s1,s2;
234   const PetscScalar  *b;
235   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
236   PetscErrorCode     ierr;
237   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
238   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
239 
240   PetscFunctionBegin;
241   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
242   its = its*lits;
243   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
244   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
245   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
246   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
247   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
248 
249   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
250 
251   diag  = a->diag;
252   idiag = a->idiag;
253   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
254   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
255 
256   if (flag & SOR_ZERO_INITIAL_GUESS) {
257     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
258       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
259       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
260       i2     = 2;
261       idiag += 4;
262       for (i=1; i<m; i++) {
263 	v     = aa + 4*ai[i];
264 	vi    = aj + ai[i];
265 	nz    = diag[i] - ai[i];
266 	s1    = b[i2]; s2 = b[i2+1];
267         for (j=0; j<nz; j++) {
268 	  idx  = 2*vi[j];
269           it   = 4*j;
270 	  x1   = x[idx]; x2 = x[1+idx];
271 	  s1  -= v[it]*x1 + v[it+2]*x2;
272 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
273 	}
274 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
275 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
276         idiag   += 4;
277         i2      += 2;
278       }
279       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
280       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
281     }
282     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
283         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
284       i2    = 0;
285       mdiag = a->idiag+4*a->mbs;
286       for (i=0; i<m; i++) {
287         x1      = x[i2]; x2 = x[i2+1];
288         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
289         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
290         mdiag  += 4;
291         i2     += 2;
292       }
293       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
294     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
295       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
296     }
297     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
298       idiag   = a->idiag+4*a->mbs - 4;
299       i2      = 2*m - 2;
300       x1      = x[i2]; x2 = x[i2+1];
301       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
302       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
303       idiag -= 4;
304       i2    -= 2;
305       for (i=m-2; i>=0; i--) {
306 	v     = aa + 4*(diag[i]+1);
307 	vi    = aj + diag[i] + 1;
308 	nz    = ai[i+1] - diag[i] - 1;
309 	s1    = x[i2]; s2 = x[i2+1];
310         for (j=0; j<nz; j++) {
311  	  idx  = 2*vi[j];
312           it   = 4*j;
313 	  x1   = x[idx]; x2 = x[1+idx];
314 	  s1  -= v[it]*x1 + v[it+2]*x2;
315 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
316 	}
317 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
318 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
319         idiag   -= 4;
320         i2      -= 2;
321       }
322       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
323     }
324   } else {
325     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
326   }
327   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
328   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
329   PetscFunctionReturn(0);
330 }
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
335 {
336   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
337   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
338   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
339   const PetscScalar  *b;
340   PetscErrorCode     ierr;
341   PetscInt           m = a->mbs,i,i2,nz,idx;
342   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
343 
344   PetscFunctionBegin;
345   its = its*lits;
346   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
347   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
348   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
349   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
350   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
351   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
352 
353   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
354 
355   diag  = a->diag;
356   idiag = a->idiag;
357   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
358   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
359 
360   if (flag & SOR_ZERO_INITIAL_GUESS) {
361     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
362       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
363       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
364       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
365       i2     = 3;
366       idiag += 9;
367       for (i=1; i<m; i++) {
368         v     = aa + 9*ai[i];
369         vi    = aj + ai[i];
370         nz    = diag[i] - ai[i];
371         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
372         while (nz--) {
373           idx  = 3*(*vi++);
374           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
375           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
376           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
377           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
378           v   += 9;
379         }
380         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
381         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
382         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
383         idiag   += 9;
384         i2      += 3;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
388     }
389     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
390         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
391       i2    = 0;
392       mdiag = a->idiag+9*a->mbs;
393       for (i=0; i<m; i++) {
394         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
396         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
397         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
398         mdiag  += 9;
399         i2     += 3;
400       }
401       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
402     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
404     }
405     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406       idiag   = a->idiag+9*a->mbs - 9;
407       i2      = 3*m - 3;
408       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
409       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
410       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
411       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
412       idiag -= 9;
413       i2    -= 3;
414       for (i=m-2; i>=0; i--) {
415         v     = aa + 9*(diag[i]+1);
416         vi    = aj + diag[i] + 1;
417         nz    = ai[i+1] - diag[i] - 1;
418         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
419         while (nz--) {
420           idx  = 3*(*vi++);
421           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
422           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
423           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
424           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
425           v   += 9;
426         }
427         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
428         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
429         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
430         idiag   -= 9;
431         i2      -= 3;
432       }
433       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
449   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
450   const PetscScalar  *b;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
457   its = its*lits;
458   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
459   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
460   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
461   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
462   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
463 
464   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
465 
466   diag  = a->diag;
467   idiag = a->idiag;
468   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
469   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
470 
471   if (flag & SOR_ZERO_INITIAL_GUESS) {
472     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
473       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
474       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
475       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
476       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
477       i2     = 4;
478       idiag += 16;
479       for (i=1; i<m; i++) {
480 	v     = aa + 16*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
484 	while (nz--) {
485 	  idx  = 4*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
487 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
488 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
489 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
490 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
491 	  v   += 16;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
494 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
495 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
496 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
497         idiag   += 16;
498         i2      += 4;
499       }
500       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
501       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
502     }
503     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
504         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
505       i2    = 0;
506       mdiag = a->idiag+16*a->mbs;
507       for (i=0; i<m; i++) {
508         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
509         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
510         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
511         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
512         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
513         mdiag  += 16;
514         i2     += 4;
515       }
516       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
517     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
518       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
519     }
520     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
521       idiag   = a->idiag+16*a->mbs - 16;
522       i2      = 4*m - 4;
523       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
524       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
525       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
526       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
527       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
528       idiag -= 16;
529       i2    -= 4;
530       for (i=m-2; i>=0; i--) {
531 	v     = aa + 16*(diag[i]+1);
532 	vi    = aj + diag[i] + 1;
533 	nz    = ai[i+1] - diag[i] - 1;
534 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
535 	while (nz--) {
536 	  idx  = 4*(*vi++);
537 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
538 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
539 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
540 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
541 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
542 	  v   += 16;
543 	}
544 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
545 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
546 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
547 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
548         idiag   -= 16;
549         i2      -= 4;
550       }
551       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
552     }
553   } else {
554     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
555   }
556   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
557   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
564 {
565   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
566   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
567   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
568   const PetscScalar  *b;
569   PetscErrorCode     ierr;
570   PetscInt           m = a->mbs,i,i2,nz,idx;
571   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
572 
573   PetscFunctionBegin;
574   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
575   its = its*lits;
576   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
577   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
578   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
579   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
580   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
581 
582   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
583 
584   diag  = a->diag;
585   idiag = a->idiag;
586   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
587   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
588 
589   if (flag & SOR_ZERO_INITIAL_GUESS) {
590     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
591       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
592       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
593       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
594       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
595       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
596       i2     = 5;
597       idiag += 25;
598       for (i=1; i<m; i++) {
599 	v     = aa + 25*ai[i];
600 	vi    = aj + ai[i];
601 	nz    = diag[i] - ai[i];
602 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
603 	while (nz--) {
604 	  idx  = 5*(*vi++);
605 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
606 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
607 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
608 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
609 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
610 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
611 	  v   += 25;
612 	}
613 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
614 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
615 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
616 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
617 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
618         idiag   += 25;
619         i2      += 5;
620       }
621       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
622       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
623     }
624     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
625         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
626       i2    = 0;
627       mdiag = a->idiag+25*a->mbs;
628       for (i=0; i<m; i++) {
629         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
630         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
631         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
632         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
633         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
634         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
635         mdiag  += 25;
636         i2     += 5;
637       }
638       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
639     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
640       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
641     }
642     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
643       idiag   = a->idiag+25*a->mbs - 25;
644       i2      = 5*m - 5;
645       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
646       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
647       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
648       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
649       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
650       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
651       idiag -= 25;
652       i2    -= 5;
653       for (i=m-2; i>=0; i--) {
654 	v     = aa + 25*(diag[i]+1);
655 	vi    = aj + diag[i] + 1;
656 	nz    = ai[i+1] - diag[i] - 1;
657 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
658 	while (nz--) {
659 	  idx  = 5*(*vi++);
660 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
661 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
662 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
663 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
664 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
665 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
666 	  v   += 25;
667 	}
668 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
669 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
670 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
671 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
672 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
673         idiag   -= 25;
674         i2      -= 5;
675       }
676       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
677     }
678   } else {
679     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
680   }
681   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
682   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
689 {
690   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
691   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
692   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
693   const PetscScalar  *b;
694   PetscErrorCode     ierr;
695   PetscInt           m = a->mbs,i,i2,nz,idx;
696   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
697 
698   PetscFunctionBegin;
699   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
700   its = its*lits;
701   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
702   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
703   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
704   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
705   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
706 
707   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
708 
709   diag  = a->diag;
710   idiag = a->idiag;
711   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
712   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
713 
714   if (flag & SOR_ZERO_INITIAL_GUESS) {
715     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
716       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
717       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
718       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
719       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
720       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
721       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
722       i2     = 6;
723       idiag += 36;
724       for (i=1; i<m; i++) {
725         v     = aa + 36*ai[i];
726         vi    = aj + ai[i];
727         nz    = diag[i] - ai[i];
728         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
729         while (nz--) {
730           idx  = 6*(*vi++);
731           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
732           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
733           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
734           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
735           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
736           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
737           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
738           v   += 36;
739         }
740         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
741         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
742         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
743         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
744         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
745         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
746         idiag   += 36;
747         i2      += 6;
748       }
749       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
750       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
751     }
752     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
753         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
754       i2    = 0;
755       mdiag = a->idiag+36*a->mbs;
756       for (i=0; i<m; i++) {
757         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
758         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
759         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
760         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
761         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
762         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
763         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
764         mdiag  += 36;
765         i2     += 6;
766       }
767       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
768     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
769       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
770     }
771     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
772       idiag   = a->idiag+36*a->mbs - 36;
773       i2      = 6*m - 6;
774       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
775       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
776       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
777       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
778       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
779       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
780       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
781       idiag -= 36;
782       i2    -= 6;
783       for (i=m-2; i>=0; i--) {
784         v     = aa + 36*(diag[i]+1);
785         vi    = aj + diag[i] + 1;
786         nz    = ai[i+1] - diag[i] - 1;
787         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
788         while (nz--) {
789           idx  = 6*(*vi++);
790           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
791           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
792           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
793           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
794           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
795           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
796           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
797           v   += 36;
798         }
799         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
800         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
801         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
802         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
803         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
804         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
805         idiag   -= 36;
806         i2      -= 6;
807       }
808       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
809     }
810   } else {
811     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
812   }
813   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
814   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
815   PetscFunctionReturn(0);
816 }
817 
818 #undef __FUNCT__
819 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
821 {
822   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
823   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
824   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
825   const PetscScalar  *b;
826   PetscErrorCode     ierr;
827   PetscInt           m = a->mbs,i,i2,nz,idx;
828   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
829 
830   PetscFunctionBegin;
831   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
832   its = its*lits;
833   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
834   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
835   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
836   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
837   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
838 
839   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
840 
841   diag  = a->diag;
842   idiag = a->idiag;
843   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
844   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
845 
846   if (flag & SOR_ZERO_INITIAL_GUESS) {
847     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
848       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
849       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
850       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
851       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
852       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
853       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
854       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
855       i2     = 7;
856       idiag += 49;
857       for (i=1; i<m; i++) {
858         v     = aa + 49*ai[i];
859         vi    = aj + ai[i];
860         nz    = diag[i] - ai[i];
861         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
862         while (nz--) {
863           idx  = 7*(*vi++);
864           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
865           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
866           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
867           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
868           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
869           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
870           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
871           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
872           v   += 49;
873         }
874         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
875         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
876         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
877         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
878         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
879         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
880         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
881         idiag   += 49;
882         i2      += 7;
883       }
884       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
885       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
886     }
887     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
888         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
889       i2    = 0;
890       mdiag = a->idiag+49*a->mbs;
891       for (i=0; i<m; i++) {
892         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
893         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
894         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
895         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
896         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
897         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
898         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
899         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
900         mdiag  += 36;
901         i2     += 6;
902       }
903       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
904     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
905       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
906     }
907     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
908       idiag   = a->idiag+49*a->mbs - 49;
909       i2      = 7*m - 7;
910       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
912       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
913       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
914       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
915       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
916       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
917       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
918       idiag -= 49;
919       i2    -= 7;
920       for (i=m-2; i>=0; i--) {
921         v     = aa + 49*(diag[i]+1);
922         vi    = aj + diag[i] + 1;
923         nz    = ai[i+1] - diag[i] - 1;
924         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
925         while (nz--) {
926           idx  = 7*(*vi++);
927           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
928           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
929           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
930           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
931           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
932           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
933           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
934           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
935           v   += 49;
936         }
937         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
938         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
939         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
940         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
941         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
942         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
943         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
944         idiag   -= 49;
945         i2      -= 7;
946       }
947       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
948     }
949   } else {
950     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
951   }
952   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
953   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 /*
958     Special version for direct calls from Fortran (Used in PETSc-fun3d)
959 */
960 #if defined(PETSC_HAVE_FORTRAN_CAPS)
961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
963 #define matsetvaluesblocked4_ matsetvaluesblocked4
964 #endif
965 
966 EXTERN_C_BEGIN
967 #undef __FUNCT__
968 #define __FUNCT__ "matsetvaluesblocked4_"
969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
970 {
971   Mat               A = *AA;
972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
974   PetscInt          *ai=a->i,*ailen=a->ilen;
975   PetscInt          *aj=a->j,stepval,lastcol = -1;
976   const PetscScalar *value = v;
977   MatScalar         *ap,*aa = a->a,*bap;
978 
979   PetscFunctionBegin;
980   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
981   stepval = (n-1)*4;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k];
984     rp   = aj + ai[row];
985     ap   = aa + 16*ai[row];
986     nrow = ailen[row];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col = in[l];
991       if (col <= lastcol) low = 0; else high = nrow;
992       lastcol = col;
993       value = v + k*(stepval+4 + l)*4;
994       while (high-low > 7) {
995         t = (low+high)/2;
996         if (rp[t] > col) high = t;
997         else             low  = t;
998       }
999       for (i=low; i<high; i++) {
1000         if (rp[i] > col) break;
1001         if (rp[i] == col) {
1002           bap  = ap +  16*i;
1003           for (ii=0; ii<4; ii++,value+=stepval) {
1004             for (jj=ii; jj<16; jj+=4) {
1005               bap[jj] += *value++;
1006             }
1007           }
1008           goto noinsert2;
1009         }
1010       }
1011       N = nrow++ - 1;
1012       high++; /* added new column index thus must search to one higher than before */
1013       /* shift up all the later entries in this row */
1014       for (ii=N; ii>=i; ii--) {
1015         rp[ii+1] = rp[ii];
1016         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1017       }
1018       if (N >= i) {
1019         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1020       }
1021       rp[i] = col;
1022       bap   = ap +  16*i;
1023       for (ii=0; ii<4; ii++,value+=stepval) {
1024         for (jj=ii; jj<16; jj+=4) {
1025           bap[jj] = *value++;
1026         }
1027       }
1028       noinsert2:;
1029       low = i;
1030     }
1031     ailen[row] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 EXTERN_C_END
1036 
1037 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1038 #define matsetvalues4_ MATSETVALUES4
1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1040 #define matsetvalues4_ matsetvalues4
1041 #endif
1042 
1043 EXTERN_C_BEGIN
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatSetValues4_"
1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1047 {
1048   Mat         A = *AA;
1049   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1050   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1051   PetscInt    *ai=a->i,*ailen=a->ilen;
1052   PetscInt    *aj=a->j,brow,bcol;
1053   PetscInt    ridx,cidx,lastcol = -1;
1054   MatScalar   *ap,value,*aa=a->a,*bap;
1055 
1056   PetscFunctionBegin;
1057   for (k=0; k<m; k++) { /* loop over added rows */
1058     row  = im[k]; brow = row/4;
1059     rp   = aj + ai[brow];
1060     ap   = aa + 16*ai[brow];
1061     nrow = ailen[brow];
1062     low  = 0;
1063     high = nrow;
1064     for (l=0; l<n; l++) { /* loop over added columns */
1065       col = in[l]; bcol = col/4;
1066       ridx = row % 4; cidx = col % 4;
1067       value = v[l + k*n];
1068       if (col <= lastcol) low = 0; else high = nrow;
1069       lastcol = col;
1070       while (high-low > 7) {
1071         t = (low+high)/2;
1072         if (rp[t] > bcol) high = t;
1073         else              low  = t;
1074       }
1075       for (i=low; i<high; i++) {
1076         if (rp[i] > bcol) break;
1077         if (rp[i] == bcol) {
1078           bap  = ap +  16*i + 4*cidx + ridx;
1079           *bap += value;
1080           goto noinsert1;
1081         }
1082       }
1083       N = nrow++ - 1;
1084       high++; /* added new column thus must search to one higher than before */
1085       /* shift up all the later entries in this row */
1086       for (ii=N; ii>=i; ii--) {
1087         rp[ii+1] = rp[ii];
1088         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1089       }
1090       if (N>=i) {
1091         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1092       }
1093       rp[i]                    = bcol;
1094       ap[16*i + 4*cidx + ridx] = value;
1095       noinsert1:;
1096       low = i;
1097     }
1098     ailen[brow] = nrow;
1099   }
1100   PetscFunctionReturnVoid();
1101 }
1102 EXTERN_C_END
1103 
1104 /*
1105      Checks for missing diagonals
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113   PetscInt       *diag,*jj = a->j,i;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1117   *missing = PETSC_FALSE;
1118   if (A->rmap->n > 0 && !jj) {
1119     *missing  = PETSC_TRUE;
1120     if (d) *d = 0;
1121     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1122   } else {
1123     diag     = a->diag;
1124     for (i=0; i<a->mbs; i++) {
1125       if (jj[diag[i]] != i) {
1126         *missing  = PETSC_TRUE;
1127         if (d) *d = i;
1128         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1129       }
1130     }
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1138 {
1139   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141   PetscInt       i,j,m = a->mbs;
1142 
1143   PetscFunctionBegin;
1144   if (!a->diag) {
1145     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1146     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1147     a->free_diag = PETSC_TRUE;
1148   }
1149   for (i=0; i<m; i++) {
1150     a->diag[i] = a->i[i+1];
1151     for (j=a->i[i]; j<a->i[i+1]; j++) {
1152       if (a->j[j] == i) {
1153         a->diag[i] = j;
1154         break;
1155       }
1156     }
1157   }
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 
1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1167 {
1168   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1169   PetscErrorCode ierr;
1170   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1171   PetscInt       *tia, *tja;
1172 
1173   PetscFunctionBegin;
1174   *nn = n;
1175   if (!ia) PetscFunctionReturn(0);
1176   if (symmetric) {
1177     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1178     nz   = tia[n];
1179   } else {
1180     tia = a->i; tja = a->j;
1181   }
1182 
1183   if (!blockcompressed && bs > 1) {
1184     (*nn) *= bs;
1185     nbs    = bs;
1186     /* malloc & create the natural set of indices */
1187     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1188     if (n) {
1189       (*ia)[0] = 0;
1190       for (j=1; j<bs; j++) {
1191         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1192       }
1193     }
1194 
1195     for (i=1; i<n; i++) {
1196       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1197       for (j=1; j<bs; j++) {
1198         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1199       }
1200     }
1201     if (n) {
1202       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1203     }
1204 
1205     if (ja) {
1206       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1207       cnt = 0;
1208       for (i=0; i<n; i++) {
1209         for (j=0; j<bs; j++) {
1210           for (k=tia[i]; k<tia[i+1]; k++) {
1211             for (l=0; l<bs; l++) {
1212               (*ja)[cnt++] = bs*tja[k] + l;
1213 	    }
1214           }
1215         }
1216       }
1217     }
1218 
1219     n     *= bs;
1220     nz *= bs*bs;
1221     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1222       ierr = PetscFree(tia);CHKERRQ(ierr);
1223       ierr = PetscFree(tja);CHKERRQ(ierr);
1224     }
1225   } else if (oshift == 1) {
1226     if (symmetric) {
1227       PetscInt nz = tia[A->rmap->n/bs];
1228       /*  add 1 to i and j indices */
1229       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1230       *ia = tia;
1231       if (ja) {
1232 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1233         *ja = tja;
1234       }
1235     } else {
1236       PetscInt nz = a->i[A->rmap->n/bs];
1237       /* malloc space and  add 1 to i and j indices */
1238       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1239       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1240       if (ja) {
1241 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1242 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1243       }
1244     }
1245   } else {
1246     *ia = tia;
1247     if (ja) *ja = tja;
1248   }
1249 
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1255 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1256 {
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   if (!ia) PetscFunctionReturn(0);
1261   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1262     ierr = PetscFree(*ia);CHKERRQ(ierr);
1263     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1270 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1278 #endif
1279   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1280   if (a->row) {
1281     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1282   }
1283   if (a->col) {
1284     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1285   }
1286   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1288   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1289   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1290   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1291   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1292   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1293   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1294   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1295 
1296   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1297   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1298   ierr = PetscFree(a);CHKERRQ(ierr);
1299 
1300   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1314 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1315 {
1316   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   switch (op) {
1321   case MAT_ROW_ORIENTED:
1322     a->roworiented    = flg;
1323     break;
1324   case MAT_KEEP_NONZERO_PATTERN:
1325     a->keepnonzeropattern = flg;
1326     break;
1327   case MAT_NEW_NONZERO_LOCATIONS:
1328     a->nonew          = (flg ? 0 : 1);
1329     break;
1330   case MAT_NEW_NONZERO_LOCATION_ERR:
1331     a->nonew          = (flg ? -1 : 0);
1332     break;
1333   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1334     a->nonew          = (flg ? -2 : 0);
1335     break;
1336   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1337     a->nounused       = (flg ? -1 : 0);
1338     break;
1339   case MAT_NEW_DIAGONALS:
1340   case MAT_IGNORE_OFF_PROC_ENTRIES:
1341   case MAT_USE_HASH_TABLE:
1342     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1343     break;
1344   case MAT_SYMMETRIC:
1345   case MAT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_HERMITIAN:
1347   case MAT_SYMMETRY_ETERNAL:
1348     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1349     break;
1350   default:
1351     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1358 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359 {
1360   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1361   PetscErrorCode ierr;
1362   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1363   MatScalar      *aa,*aa_i;
1364   PetscScalar    *v_i;
1365 
1366   PetscFunctionBegin;
1367   bs  = A->rmap->bs;
1368   ai  = a->i;
1369   aj  = a->j;
1370   aa  = a->a;
1371   bs2 = a->bs2;
1372 
1373   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1374 
1375   bn  = row/bs;   /* Block number */
1376   bp  = row % bs; /* Block Position */
1377   M   = ai[bn+1] - ai[bn];
1378   *nz = bs*M;
1379 
1380   if (v) {
1381     *v = 0;
1382     if (*nz) {
1383       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1384       for (i=0; i<M; i++) { /* for each block in the block row */
1385         v_i  = *v + i*bs;
1386         aa_i = aa + bs2*(ai[bn] + i);
1387         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1388       }
1389     }
1390   }
1391 
1392   if (idx) {
1393     *idx = 0;
1394     if (*nz) {
1395       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1396       for (i=0; i<M; i++) { /* for each block in the block row */
1397         idx_i = *idx + i*bs;
1398         itmp  = bs*aj[ai[bn] + i];
1399         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1400       }
1401     }
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1408 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1414   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1422 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1423 {
1424   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1425   Mat            C;
1426   PetscErrorCode ierr;
1427   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1428   PetscInt       *rows,*cols,bs2=a->bs2;
1429   MatScalar      *array;
1430 
1431   PetscFunctionBegin;
1432   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1433   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1434     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1435     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1436 
1437     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1438     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1439     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1440     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1441     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1442     ierr = PetscFree(col);CHKERRQ(ierr);
1443   } else {
1444     C = *B;
1445   }
1446 
1447   array = a->a;
1448   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1449   for (i=0; i<mbs; i++) {
1450     cols[0] = i*bs;
1451     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1452     len = ai[i+1] - ai[i];
1453     for (j=0; j<len; j++) {
1454       rows[0] = (*aj++)*bs;
1455       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1456       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1457       array += bs2;
1458     }
1459   }
1460   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1461 
1462   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464 
1465   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1466     *B = C;
1467   } else {
1468     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1475 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1476 {
1477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1480   int            fd;
1481   PetscScalar    *aa;
1482   FILE           *file;
1483 
1484   PetscFunctionBegin;
1485   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1486   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1487   col_lens[0] = MAT_FILE_COOKIE;
1488 
1489   col_lens[1] = A->rmap->N;
1490   col_lens[2] = A->cmap->n;
1491   col_lens[3] = a->nz*bs2;
1492 
1493   /* store lengths of each row and write (including header) to file */
1494   count = 0;
1495   for (i=0; i<a->mbs; i++) {
1496     for (j=0; j<bs; j++) {
1497       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1498     }
1499   }
1500   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1501   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1502 
1503   /* store column indices (zero start index) */
1504   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1505   count = 0;
1506   for (i=0; i<a->mbs; i++) {
1507     for (j=0; j<bs; j++) {
1508       for (k=a->i[i]; k<a->i[i+1]; k++) {
1509         for (l=0; l<bs; l++) {
1510           jj[count++] = bs*a->j[k] + l;
1511         }
1512       }
1513     }
1514   }
1515   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1516   ierr = PetscFree(jj);CHKERRQ(ierr);
1517 
1518   /* store nonzero values */
1519   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1520   count = 0;
1521   for (i=0; i<a->mbs; i++) {
1522     for (j=0; j<bs; j++) {
1523       for (k=a->i[i]; k<a->i[i+1]; k++) {
1524         for (l=0; l<bs; l++) {
1525           aa[count++] = a->a[bs2*k + l*bs + j];
1526         }
1527       }
1528     }
1529   }
1530   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1531   ierr = PetscFree(aa);CHKERRQ(ierr);
1532 
1533   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1534   if (file) {
1535     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1543 {
1544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1545   PetscErrorCode    ierr;
1546   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1547   PetscViewerFormat format;
1548 
1549   PetscFunctionBegin;
1550   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1551   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1552     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1553   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1554     Mat aij;
1555     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1556     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1557     ierr = MatDestroy(aij);CHKERRQ(ierr);
1558   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1559      PetscFunctionReturn(0);
1560   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1562     for (i=0; i<a->mbs; i++) {
1563       for (j=0; j<bs; j++) {
1564         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1565         for (k=a->i[i]; k<a->i[i+1]; k++) {
1566           for (l=0; l<bs; l++) {
1567 #if defined(PETSC_USE_COMPLEX)
1568             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1570                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1573                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1575               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             }
1577 #else
1578             if (a->a[bs2*k + l*bs + j] != 0.0) {
1579               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1580             }
1581 #endif
1582           }
1583         }
1584         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1585       }
1586     }
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1588   } else {
1589     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1590     for (i=0; i<a->mbs; i++) {
1591       for (j=0; j<bs; j++) {
1592         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1593         for (k=a->i[i]; k<a->i[i+1]; k++) {
1594           for (l=0; l<bs; l++) {
1595 #if defined(PETSC_USE_COMPLEX)
1596             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1597               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1598                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1599             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1600               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1601                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             } else {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1604             }
1605 #else
1606             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1607 #endif
1608           }
1609         }
1610         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1611       }
1612     }
1613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1614   }
1615   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1621 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1622 {
1623   Mat            A = (Mat) Aa;
1624   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1625   PetscErrorCode ierr;
1626   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1627   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1628   MatScalar      *aa;
1629   PetscViewer    viewer;
1630 
1631   PetscFunctionBegin;
1632 
1633   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1634   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1635 
1636   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1637 
1638   /* loop over matrix elements drawing boxes */
1639   color = PETSC_DRAW_BLUE;
1640   for (i=0,row=0; i<mbs; i++,row+=bs) {
1641     for (j=a->i[i]; j<a->i[i+1]; j++) {
1642       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644       aa = a->a + j*bs2;
1645       for (k=0; k<bs; k++) {
1646         for (l=0; l<bs; l++) {
1647           if (PetscRealPart(*aa++) >=  0.) continue;
1648           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1649         }
1650       }
1651     }
1652   }
1653   color = PETSC_DRAW_CYAN;
1654   for (i=0,row=0; i<mbs; i++,row+=bs) {
1655     for (j=a->i[i]; j<a->i[i+1]; j++) {
1656       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1657       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1658       aa = a->a + j*bs2;
1659       for (k=0; k<bs; k++) {
1660         for (l=0; l<bs; l++) {
1661           if (PetscRealPart(*aa++) != 0.) continue;
1662           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1663         }
1664       }
1665     }
1666   }
1667 
1668   color = PETSC_DRAW_RED;
1669   for (i=0,row=0; i<mbs; i++,row+=bs) {
1670     for (j=a->i[i]; j<a->i[i+1]; j++) {
1671       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1672       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1673       aa = a->a + j*bs2;
1674       for (k=0; k<bs; k++) {
1675         for (l=0; l<bs; l++) {
1676           if (PetscRealPart(*aa++) <= 0.) continue;
1677           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1678         }
1679       }
1680     }
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscReal      xl,yl,xr,yr,w,h;
1691   PetscDraw      draw;
1692   PetscTruth     isnull;
1693 
1694   PetscFunctionBegin;
1695 
1696   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1697   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1698 
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1700   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701   xr += w;    yr += h;  xl = -w;     yl = -h;
1702   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1703   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ"
1710 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1711 {
1712   PetscErrorCode ierr;
1713   PetscTruth     iascii,isbinary,isdraw;
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1717   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1718   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1719   if (iascii){
1720     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1721   } else if (isbinary) {
1722     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1723   } else if (isdraw) {
1724     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1725   } else {
1726     Mat B;
1727     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1728     ierr = MatView(B,viewer);CHKERRQ(ierr);
1729     ierr = MatDestroy(B);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1737 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1738 {
1739   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1740   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1741   PetscInt    *ai = a->i,*ailen = a->ilen;
1742   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1743   MatScalar   *ap,*aa = a->a;
1744 
1745   PetscFunctionBegin;
1746   for (k=0; k<m; k++) { /* loop over rows */
1747     row  = im[k]; brow = row/bs;
1748     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1749     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1750     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1751     nrow = ailen[brow];
1752     for (l=0; l<n; l++) { /* loop over columns */
1753       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1754       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1755       col  = in[l] ;
1756       bcol = col/bs;
1757       cidx = col%bs;
1758       ridx = row%bs;
1759       high = nrow;
1760       low  = 0; /* assume unsorted */
1761       while (high-low > 5) {
1762         t = (low+high)/2;
1763         if (rp[t] > bcol) high = t;
1764         else             low  = t;
1765       }
1766       for (i=low; i<high; i++) {
1767         if (rp[i] > bcol) break;
1768         if (rp[i] == bcol) {
1769           *v++ = ap[bs2*i+bs*cidx+ridx];
1770           goto finished;
1771         }
1772       }
1773       *v++ = 0.0;
1774       finished:;
1775     }
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #define CHUNKSIZE 10
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1783 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1784 {
1785   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1786   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1787   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1788   PetscErrorCode    ierr;
1789   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1790   PetscTruth        roworiented=a->roworiented;
1791   const PetscScalar *value = v;
1792   MatScalar         *ap,*aa = a->a,*bap;
1793 
1794   PetscFunctionBegin;
1795   if (roworiented) {
1796     stepval = (n-1)*bs;
1797   } else {
1798     stepval = (m-1)*bs;
1799   }
1800   for (k=0; k<m; k++) { /* loop over added rows */
1801     row  = im[k];
1802     if (row < 0) continue;
1803 #if defined(PETSC_USE_DEBUG)
1804     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1805 #endif
1806     rp   = aj + ai[row];
1807     ap   = aa + bs2*ai[row];
1808     rmax = imax[row];
1809     nrow = ailen[row];
1810     low  = 0;
1811     high = nrow;
1812     for (l=0; l<n; l++) { /* loop over added columns */
1813       if (in[l] < 0) continue;
1814 #if defined(PETSC_USE_DEBUG)
1815       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1816 #endif
1817       col = in[l];
1818       if (roworiented) {
1819         value = v + k*(stepval+bs)*bs + l*bs;
1820       } else {
1821         value = v + l*(stepval+bs)*bs + k*bs;
1822       }
1823       if (col <= lastcol) low = 0; else high = nrow;
1824       lastcol = col;
1825       while (high-low > 7) {
1826         t = (low+high)/2;
1827         if (rp[t] > col) high = t;
1828         else             low  = t;
1829       }
1830       for (i=low; i<high; i++) {
1831         if (rp[i] > col) break;
1832         if (rp[i] == col) {
1833           bap  = ap +  bs2*i;
1834           if (roworiented) {
1835             if (is == ADD_VALUES) {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] += *value++;
1839                 }
1840               }
1841             } else {
1842               for (ii=0; ii<bs; ii++,value+=stepval) {
1843                 for (jj=ii; jj<bs2; jj+=bs) {
1844                   bap[jj] = *value++;
1845                 }
1846               }
1847             }
1848           } else {
1849             if (is == ADD_VALUES) {
1850               for (ii=0; ii<bs; ii++,value+=stepval) {
1851                 for (jj=0; jj<bs; jj++) {
1852                   *bap++ += *value++;
1853                 }
1854               }
1855             } else {
1856               for (ii=0; ii<bs; ii++,value+=stepval) {
1857                 for (jj=0; jj<bs; jj++) {
1858                   *bap++  = *value++;
1859                 }
1860               }
1861             }
1862           }
1863           goto noinsert2;
1864         }
1865       }
1866       if (nonew == 1) goto noinsert2;
1867       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1868       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1869       N = nrow++ - 1; high++;
1870       /* shift up all the later entries in this row */
1871       for (ii=N; ii>=i; ii--) {
1872         rp[ii+1] = rp[ii];
1873         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1874       }
1875       if (N >= i) {
1876         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1877       }
1878       rp[i] = col;
1879       bap   = ap +  bs2*i;
1880       if (roworiented) {
1881         for (ii=0; ii<bs; ii++,value+=stepval) {
1882           for (jj=ii; jj<bs2; jj+=bs) {
1883             bap[jj] = *value++;
1884           }
1885         }
1886       } else {
1887         for (ii=0; ii<bs; ii++,value+=stepval) {
1888           for (jj=0; jj<bs; jj++) {
1889             *bap++  = *value++;
1890           }
1891         }
1892       }
1893       noinsert2:;
1894       low = i;
1895     }
1896     ailen[row] = nrow;
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1903 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1904 {
1905   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1906   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1907   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1908   PetscErrorCode ierr;
1909   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1910   MatScalar      *aa = a->a,*ap;
1911   PetscReal      ratio=0.6;
1912 
1913   PetscFunctionBegin;
1914   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1915 
1916   if (m) rmax = ailen[0];
1917   for (i=1; i<mbs; i++) {
1918     /* move each row back by the amount of empty slots (fshift) before it*/
1919     fshift += imax[i-1] - ailen[i-1];
1920     rmax   = PetscMax(rmax,ailen[i]);
1921     if (fshift) {
1922       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1923       N = ailen[i];
1924       for (j=0; j<N; j++) {
1925         ip[j-fshift] = ip[j];
1926         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1927       }
1928     }
1929     ai[i] = ai[i-1] + ailen[i-1];
1930   }
1931   if (mbs) {
1932     fshift += imax[mbs-1] - ailen[mbs-1];
1933     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1934   }
1935   /* reset ilen and imax for each row */
1936   for (i=0; i<mbs; i++) {
1937     ailen[i] = imax[i] = ai[i+1] - ai[i];
1938   }
1939   a->nz = ai[mbs];
1940 
1941   /* diagonals may have moved, so kill the diagonal pointers */
1942   a->idiagvalid = PETSC_FALSE;
1943   if (fshift && a->diag) {
1944     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1945     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1946     a->diag = 0;
1947   }
1948   if (fshift && a->nounused == -1) {
1949     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1950   }
1951   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1953   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1954   a->reallocs          = 0;
1955   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1956 
1957   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1958   if (a->compressedrow.use){
1959     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1960   }
1961 
1962   A->same_nonzero = PETSC_TRUE;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 /*
1967    This function returns an array of flags which indicate the locations of contiguous
1968    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1969    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1970    Assume: sizes should be long enough to hold all the values.
1971 */
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1975 {
1976   PetscInt   i,j,k,row;
1977   PetscTruth flg;
1978 
1979   PetscFunctionBegin;
1980   for (i=0,j=0; i<n; j++) {
1981     row = idx[i];
1982     if (row%bs!=0) { /* Not the begining of a block */
1983       sizes[j] = 1;
1984       i++;
1985     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1986       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1987       i++;
1988     } else { /* Begining of the block, so check if the complete block exists */
1989       flg = PETSC_TRUE;
1990       for (k=1; k<bs; k++) {
1991         if (row+k != idx[i+k]) { /* break in the block */
1992           flg = PETSC_FALSE;
1993           break;
1994         }
1995       }
1996       if (flg) { /* No break in the bs */
1997         sizes[j] = bs;
1998         i+= bs;
1999       } else {
2000         sizes[j] = 1;
2001         i++;
2002       }
2003     }
2004   }
2005   *bs_max = j;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2012 {
2013   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2014   PetscErrorCode ierr;
2015   PetscInt       i,j,k,count,*rows;
2016   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2017   PetscScalar    zero = 0.0;
2018   MatScalar      *aa;
2019 
2020   PetscFunctionBegin;
2021   /* Make a copy of the IS and  sort it */
2022   /* allocate memory for rows,sizes */
2023   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2024 
2025   /* copy IS values to rows, and sort them */
2026   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2027   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2028   if (baij->keepnonzeropattern) {
2029     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2030     bs_max = is_n;
2031     A->same_nonzero = PETSC_TRUE;
2032   } else {
2033     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2034     A->same_nonzero = PETSC_FALSE;
2035   }
2036 
2037   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2038     row   = rows[j];
2039     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2040     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2041     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2042     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2043       if (diag != 0.0) {
2044         if (baij->ilen[row/bs] > 0) {
2045           baij->ilen[row/bs]       = 1;
2046           baij->j[baij->i[row/bs]] = row/bs;
2047           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2048         }
2049         /* Now insert all the diagonal values for this bs */
2050         for (k=0; k<bs; k++) {
2051           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2052         }
2053       } else { /* (diag == 0.0) */
2054         baij->ilen[row/bs] = 0;
2055       } /* end (diag == 0.0) */
2056     } else { /* (sizes[i] != bs) */
2057 #if defined (PETSC_USE_DEBUG)
2058       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2059 #endif
2060       for (k=0; k<count; k++) {
2061         aa[0] =  zero;
2062         aa    += bs;
2063       }
2064       if (diag != 0.0) {
2065         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2066       }
2067     }
2068   }
2069 
2070   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2071   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2077 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2078 {
2079   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2080   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2081   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2082   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2083   PetscErrorCode ierr;
2084   PetscInt       ridx,cidx,bs2=a->bs2;
2085   PetscTruth     roworiented=a->roworiented;
2086   MatScalar      *ap,value,*aa=a->a,*bap;
2087 
2088   PetscFunctionBegin;
2089   if (v) PetscValidScalarPointer(v,6);
2090   for (k=0; k<m; k++) { /* loop over added rows */
2091     row  = im[k];
2092     brow = row/bs;
2093     if (row < 0) continue;
2094 #if defined(PETSC_USE_DEBUG)
2095     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2096 #endif
2097     rp   = aj + ai[brow];
2098     ap   = aa + bs2*ai[brow];
2099     rmax = imax[brow];
2100     nrow = ailen[brow];
2101     low  = 0;
2102     high = nrow;
2103     for (l=0; l<n; l++) { /* loop over added columns */
2104       if (in[l] < 0) continue;
2105 #if defined(PETSC_USE_DEBUG)
2106       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2107 #endif
2108       col = in[l]; bcol = col/bs;
2109       ridx = row % bs; cidx = col % bs;
2110       if (roworiented) {
2111         value = v[l + k*n];
2112       } else {
2113         value = v[k + l*m];
2114       }
2115       if (col <= lastcol) low = 0; else high = nrow;
2116       lastcol = col;
2117       while (high-low > 7) {
2118         t = (low+high)/2;
2119         if (rp[t] > bcol) high = t;
2120         else              low  = t;
2121       }
2122       for (i=low; i<high; i++) {
2123         if (rp[i] > bcol) break;
2124         if (rp[i] == bcol) {
2125           bap  = ap +  bs2*i + bs*cidx + ridx;
2126           if (is == ADD_VALUES) *bap += value;
2127           else                  *bap  = value;
2128           goto noinsert1;
2129         }
2130       }
2131       if (nonew == 1) goto noinsert1;
2132       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2133       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2134       N = nrow++ - 1; high++;
2135       /* shift up all the later entries in this row */
2136       for (ii=N; ii>=i; ii--) {
2137         rp[ii+1] = rp[ii];
2138         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2139       }
2140       if (N>=i) {
2141         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2142       }
2143       rp[i]                      = bcol;
2144       ap[bs2*i + bs*cidx + ridx] = value;
2145       a->nz++;
2146       noinsert1:;
2147       low = i;
2148     }
2149     ailen[brow] = nrow;
2150   }
2151   A->same_nonzero = PETSC_FALSE;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2159 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2162   Mat            outA;
2163   PetscErrorCode ierr;
2164   PetscTruth     row_identity,col_identity;
2165 
2166   PetscFunctionBegin;
2167   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2168   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2169   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2170   if (!row_identity || !col_identity) {
2171     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2172   }
2173 
2174   outA          = inA;
2175   inA->factor   = MAT_FACTOR_LU;
2176 
2177   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2178 
2179   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2180   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2181   a->row = row;
2182   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2183   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2184   a->col = col;
2185 
2186   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2187   if (a->icol) {
2188     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2189   }
2190   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2191   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2192 
2193   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2194   if (!a->solve_work) {
2195     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2196     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2197   }
2198   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2199 
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 EXTERN_C_BEGIN
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2206 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2207 {
2208   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2209   PetscInt    i,nz,mbs;
2210 
2211   PetscFunctionBegin;
2212   nz  = baij->maxnz/baij->bs2;
2213   mbs = baij->mbs;
2214   for (i=0; i<nz; i++) {
2215     baij->j[i] = indices[i];
2216   }
2217   baij->nz = nz;
2218   for (i=0; i<mbs; i++) {
2219     baij->ilen[i] = baij->imax[i];
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 EXTERN_C_END
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2227 /*@
2228     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2229        in the matrix.
2230 
2231   Input Parameters:
2232 +  mat - the SeqBAIJ matrix
2233 -  indices - the column indices
2234 
2235   Level: advanced
2236 
2237   Notes:
2238     This can be called if you have precomputed the nonzero structure of the
2239   matrix and want to provide it to the matrix object to improve the performance
2240   of the MatSetValues() operation.
2241 
2242     You MUST have set the correct numbers of nonzeros per row in the call to
2243   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2244 
2245     MUST be called before any calls to MatSetValues();
2246 
2247 @*/
2248 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2249 {
2250   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2251 
2252   PetscFunctionBegin;
2253   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2254   PetscValidPointer(indices,2);
2255   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2256   if (f) {
2257     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2258   } else {
2259     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2266 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2267 {
2268   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2269   PetscErrorCode ierr;
2270   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2271   PetscReal      atmp;
2272   PetscScalar    *x,zero = 0.0;
2273   MatScalar      *aa;
2274   PetscInt       ncols,brow,krow,kcol;
2275 
2276   PetscFunctionBegin;
2277   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2278   bs   = A->rmap->bs;
2279   aa   = a->a;
2280   ai   = a->i;
2281   aj   = a->j;
2282   mbs  = a->mbs;
2283 
2284   ierr = VecSet(v,zero);CHKERRQ(ierr);
2285   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2286   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2287   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2288   for (i=0; i<mbs; i++) {
2289     ncols = ai[1] - ai[0]; ai++;
2290     brow  = bs*i;
2291     for (j=0; j<ncols; j++){
2292       for (kcol=0; kcol<bs; kcol++){
2293         for (krow=0; krow<bs; krow++){
2294           atmp = PetscAbsScalar(*aa);aa++;
2295           row = brow + krow;    /* row index */
2296           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2297           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2298         }
2299       }
2300       aj++;
2301     }
2302   }
2303   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatCopy_SeqBAIJ"
2309 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   /* If the two matrices have the same copy implementation, use fast copy. */
2315   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2316     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2317     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2318 
2319     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2320       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2321     }
2322     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2323   } else {
2324     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2325   }
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2331 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2332 {
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2342 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2343 {
2344   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2345   PetscFunctionBegin;
2346   *array = a->a;
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2352 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2353 {
2354   PetscFunctionBegin;
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #include "petscblaslapack.h"
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2361 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2362 {
2363   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2364   PetscErrorCode ierr;
2365   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2366   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2367 
2368   PetscFunctionBegin;
2369   if (str == SAME_NONZERO_PATTERN) {
2370     PetscScalar alpha = a;
2371     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2372   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2373     if (y->xtoy && y->XtoY != X) {
2374       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2375       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2376     }
2377     if (!y->xtoy) { /* get xtoy */
2378       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2379       y->XtoY = X;
2380       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2381     }
2382     bs2 = bs*bs;
2383     for (i=0; i<x->nz; i++) {
2384       j = 0;
2385       while (j < bs2){
2386         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2387         j++;
2388       }
2389     }
2390     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2391   } else {
2392     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2393   }
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2399 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2400 {
2401   PetscInt rbs,cbs;
2402   PetscErrorCode ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2406   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2407   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2408   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2414 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2415 {
2416   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2417   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2418   MatScalar      *aa = a->a;
2419 
2420   PetscFunctionBegin;
2421   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2427 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2428 {
2429   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2430   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2431   MatScalar      *aa = a->a;
2432 
2433   PetscFunctionBegin;
2434   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2442 /*
2443     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2444 */
2445 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2446 {
2447   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2450   PetscInt       nz = a->i[m],row,*jj,mr,col;
2451 
2452   PetscFunctionBegin;
2453   *nn = n;
2454   if (!ia) PetscFunctionReturn(0);
2455   if (symmetric) {
2456     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2457   } else {
2458     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2459     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2460     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2461     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2462     jj = a->j;
2463     for (i=0; i<nz; i++) {
2464       collengths[jj[i]]++;
2465     }
2466     cia[0] = oshift;
2467     for (i=0; i<n; i++) {
2468       cia[i+1] = cia[i] + collengths[i];
2469     }
2470     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2471     jj   = a->j;
2472     for (row=0; row<m; row++) {
2473       mr = a->i[row+1] - a->i[row];
2474       for (i=0; i<mr; i++) {
2475         col = *jj++;
2476         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2477       }
2478     }
2479     ierr = PetscFree(collengths);CHKERRQ(ierr);
2480     *ia = cia; *ja = cja;
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2487 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2488 {
2489   PetscErrorCode ierr;
2490 
2491   PetscFunctionBegin;
2492   if (!ia) PetscFunctionReturn(0);
2493   ierr = PetscFree(*ia);CHKERRQ(ierr);
2494   ierr = PetscFree(*ja);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2500 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2501 {
2502   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2503   PetscErrorCode ierr;
2504   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2505   PetscScalar    dx,*y,*xx,*w3_array;
2506   PetscScalar    *vscale_array;
2507   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2508   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2509   void           *fctx = coloring->fctx;
2510   PetscTruth     flg = PETSC_FALSE;
2511   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2512   Vec            x1_tmp;
2513 
2514   PetscFunctionBegin;
2515   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2516   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2517   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2518   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2519 
2520   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2521   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2522   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2523   if (flg) {
2524     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2525   } else {
2526     PetscTruth assembled;
2527     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2528     if (assembled) {
2529       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2530     }
2531   }
2532 
2533   x1_tmp = x1;
2534   if (!coloring->vscale){
2535     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2536   }
2537 
2538   /*
2539     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2540     coloring->F for the coarser grids from the finest
2541   */
2542   if (coloring->F) {
2543     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2544     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2545     if (m1 != m2) {
2546       coloring->F = 0;
2547       }
2548     }
2549 
2550   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2551     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2552   }
2553   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2554 
2555   /* Set w1 = F(x1) */
2556   if (coloring->F) {
2557     w1          = coloring->F; /* use already computed value of function */
2558     coloring->F = 0;
2559   } else {
2560     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2561     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2562     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2563   }
2564 
2565   if (!coloring->w3) {
2566     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2567     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2568   }
2569   w3 = coloring->w3;
2570 
2571     CHKMEMQ;
2572     /* Compute all the local scale factors, including ghost points */
2573   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2574   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2575   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2576   if (ctype == IS_COLORING_GHOSTED){
2577     col_start = 0; col_end = N;
2578   } else if (ctype == IS_COLORING_GLOBAL){
2579     xx = xx - start;
2580     vscale_array = vscale_array - start;
2581     col_start = start; col_end = N + start;
2582   }    CHKMEMQ;
2583   for (col=col_start; col<col_end; col++){
2584     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2585     if (coloring->htype[0] == 'w') {
2586       dx = 1.0 + unorm;
2587     } else {
2588       dx  = xx[col];
2589     }
2590     if (dx == 0.0) dx = 1.0;
2591 #if !defined(PETSC_USE_COMPLEX)
2592     if (dx < umin && dx >= 0.0)      dx = umin;
2593     else if (dx < 0.0 && dx > -umin) dx = -umin;
2594 #else
2595     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2596     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2597 #endif
2598     dx               *= epsilon;
2599     vscale_array[col] = 1.0/dx;
2600   }     CHKMEMQ;
2601   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2602   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2603   if (ctype == IS_COLORING_GLOBAL){
2604     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2606   }
2607         CHKMEMQ;
2608   if (coloring->vscaleforrow) {
2609     vscaleforrow = coloring->vscaleforrow;
2610   } else {
2611     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2612   }
2613 
2614 
2615   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2616   /*
2617     Loop over each color
2618   */
2619   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2620   for (k=0; k<coloring->ncolors; k++) {
2621     coloring->currentcolor = k;
2622     for (i=0; i<bs; i++) {
2623       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2624       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2625       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2626       /*
2627 	Loop over each column associated with color
2628 	adding the perturbation to the vector w3.
2629       */
2630       for (l=0; l<coloring->ncolumns[k]; l++) {
2631 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2632 	if (coloring->htype[0] == 'w') {
2633 	  dx = 1.0 + unorm;
2634 	} else {
2635 	  dx  = xx[col];
2636 	}
2637 	if (dx == 0.0) dx = 1.0;
2638 #if !defined(PETSC_USE_COMPLEX)
2639 	if (dx < umin && dx >= 0.0)      dx = umin;
2640 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2641 #else
2642 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2643 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2644 #endif
2645 	dx            *= epsilon;
2646 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2647 	w3_array[col] += dx;
2648       }
2649       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2650       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2651 
2652       /*
2653 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2654 	w2 = F(x1 + dx) - F(x1)
2655       */
2656       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2657       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2658       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2659       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2660 
2661       /*
2662 	Loop over rows of vector, putting results into Jacobian matrix
2663       */
2664       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2665       for (l=0; l<coloring->nrows[k]; l++) {
2666 	row    = bs*coloring->rows[k][l];             /* local row index */
2667 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2668         for (j=0; j<bs; j++) {
2669   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2670           srows[j]  = row + start + j;
2671         }
2672 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2673       }
2674       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2675     }
2676   } /* endof for each color */
2677   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2678   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2679   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2680   ierr = PetscFree(srows);CHKERRQ(ierr);
2681 
2682   coloring->currentcolor = -1;
2683   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 /* -------------------------------------------------------------------*/
2690 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2691        MatGetRow_SeqBAIJ,
2692        MatRestoreRow_SeqBAIJ,
2693        MatMult_SeqBAIJ_N,
2694 /* 4*/ MatMultAdd_SeqBAIJ_N,
2695        MatMultTranspose_SeqBAIJ,
2696        MatMultTransposeAdd_SeqBAIJ,
2697        0,
2698        0,
2699        0,
2700 /*10*/ 0,
2701        MatLUFactor_SeqBAIJ,
2702        0,
2703        0,
2704        MatTranspose_SeqBAIJ,
2705 /*15*/ MatGetInfo_SeqBAIJ,
2706        MatEqual_SeqBAIJ,
2707        MatGetDiagonal_SeqBAIJ,
2708        MatDiagonalScale_SeqBAIJ,
2709        MatNorm_SeqBAIJ,
2710 /*20*/ 0,
2711        MatAssemblyEnd_SeqBAIJ,
2712        MatSetOption_SeqBAIJ,
2713        MatZeroEntries_SeqBAIJ,
2714 /*24*/ MatZeroRows_SeqBAIJ,
2715        0,
2716        0,
2717        0,
2718        0,
2719 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2720        0,
2721        0,
2722        MatGetArray_SeqBAIJ,
2723        MatRestoreArray_SeqBAIJ,
2724 /*34*/ MatDuplicate_SeqBAIJ,
2725        0,
2726        0,
2727        MatILUFactor_SeqBAIJ,
2728        0,
2729 /*39*/ MatAXPY_SeqBAIJ,
2730        MatGetSubMatrices_SeqBAIJ,
2731        MatIncreaseOverlap_SeqBAIJ,
2732        MatGetValues_SeqBAIJ,
2733        MatCopy_SeqBAIJ,
2734 /*44*/ 0,
2735        MatScale_SeqBAIJ,
2736        0,
2737        0,
2738        MatILUDTFactor_SeqBAIJ,
2739 /*49*/ MatSetBlockSize_SeqBAIJ,
2740        MatGetRowIJ_SeqBAIJ,
2741        MatRestoreRowIJ_SeqBAIJ,
2742        MatGetColumnIJ_SeqBAIJ,
2743        MatRestoreColumnIJ_SeqBAIJ,
2744 /*54*/ MatFDColoringCreate_SeqAIJ,
2745        0,
2746        0,
2747        0,
2748        MatSetValuesBlocked_SeqBAIJ,
2749 /*59*/ MatGetSubMatrix_SeqBAIJ,
2750        MatDestroy_SeqBAIJ,
2751        MatView_SeqBAIJ,
2752        0,
2753        0,
2754 /*64*/ 0,
2755        0,
2756        0,
2757        0,
2758        0,
2759 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2760        0,
2761        MatConvert_Basic,
2762        0,
2763        0,
2764 /*74*/ 0,
2765        MatFDColoringApply_BAIJ,
2766        0,
2767        0,
2768        0,
2769 /*79*/ 0,
2770        0,
2771        0,
2772        0,
2773        MatLoad_SeqBAIJ,
2774 /*84*/ 0,
2775        0,
2776        0,
2777        0,
2778        0,
2779 /*89*/ 0,
2780        0,
2781        0,
2782        0,
2783        0,
2784 /*94*/ 0,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*99*/0,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*104*/0,
2795        MatRealPart_SeqBAIJ,
2796        MatImaginaryPart_SeqBAIJ,
2797        0,
2798        0,
2799 /*109*/0,
2800        0,
2801        0,
2802        0,
2803        MatMissingDiagonal_SeqBAIJ,
2804 /*114*/0,
2805        0,
2806        0,
2807        0,
2808        0,
2809 /*119*/0,
2810        0,
2811        MatMultHermitianTranspose_SeqBAIJ,
2812        MatMultHermitianTransposeAdd_SeqBAIJ
2813 };
2814 
2815 EXTERN_C_BEGIN
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2819 {
2820   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2821   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2822   PetscErrorCode ierr;
2823 
2824   PetscFunctionBegin;
2825   if (aij->nonew != 1) {
2826     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2827   }
2828 
2829   /* allocate space for values if not already there */
2830   if (!aij->saved_values) {
2831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2832     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2833   }
2834 
2835   /* copy values over */
2836   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 EXTERN_C_END
2840 
2841 EXTERN_C_BEGIN
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2844 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2845 {
2846   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2847   PetscErrorCode ierr;
2848   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2849 
2850   PetscFunctionBegin;
2851   if (aij->nonew != 1) {
2852     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2853   }
2854   if (!aij->saved_values) {
2855     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2856   }
2857 
2858   /* copy values over */
2859   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 EXTERN_C_END
2863 
2864 EXTERN_C_BEGIN
2865 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2866 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2867 EXTERN_C_END
2868 
2869 EXTERN_C_BEGIN
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2872 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2873 {
2874   Mat_SeqBAIJ    *b;
2875   PetscErrorCode ierr;
2876   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2877   PetscTruth     flg,skipallocation = PETSC_FALSE;
2878 
2879   PetscFunctionBegin;
2880 
2881   if (nz == MAT_SKIP_ALLOCATION) {
2882     skipallocation = PETSC_TRUE;
2883     nz             = 0;
2884   }
2885 
2886   if (bs < 0) {
2887     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2888       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2889     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2890     bs   = PetscAbs(bs);
2891   }
2892   if (nnz && newbs != bs) {
2893     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2894   }
2895   bs   = newbs;
2896 
2897   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2898   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2899   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2900   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2901 
2902   B->preallocated = PETSC_TRUE;
2903 
2904   mbs  = B->rmap->n/bs;
2905   nbs  = B->cmap->n/bs;
2906   bs2  = bs*bs;
2907 
2908   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2909     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2910   }
2911 
2912   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2913   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2914   if (nnz) {
2915     for (i=0; i<mbs; i++) {
2916       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2917       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2918     }
2919   }
2920 
2921   b       = (Mat_SeqBAIJ*)B->data;
2922   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2923     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2924   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2925 
2926   if (!flg) {
2927     switch (bs) {
2928     case 1:
2929       B->ops->mult            = MatMult_SeqBAIJ_1;
2930       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2931       B->ops->sor           = MatSOR_SeqBAIJ_1;
2932       break;
2933     case 2:
2934       B->ops->mult            = MatMult_SeqBAIJ_2;
2935       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2936       B->ops->sor           = MatSOR_SeqBAIJ_2;
2937       break;
2938     case 3:
2939       B->ops->mult            = MatMult_SeqBAIJ_3;
2940       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2941       B->ops->sor           = MatSOR_SeqBAIJ_3;
2942       break;
2943     case 4:
2944       B->ops->mult            = MatMult_SeqBAIJ_4;
2945       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2946       B->ops->sor           = MatSOR_SeqBAIJ_4;
2947       break;
2948     case 5:
2949       B->ops->mult            = MatMult_SeqBAIJ_5;
2950       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2951       B->ops->sor           = MatSOR_SeqBAIJ_5;
2952       break;
2953     case 6:
2954       B->ops->mult            = MatMult_SeqBAIJ_6;
2955       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2956       B->ops->sor           = MatSOR_SeqBAIJ_6;
2957       break;
2958     case 7:
2959       B->ops->mult            = MatMult_SeqBAIJ_7;
2960       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2961       B->ops->sor           = MatSOR_SeqBAIJ_7;
2962       break;
2963     default:
2964       B->ops->mult            = MatMult_SeqBAIJ_N;
2965       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2966       break;
2967     }
2968   }
2969   B->rmap->bs      = bs;
2970   b->mbs     = mbs;
2971   b->nbs     = nbs;
2972   if (!skipallocation) {
2973     if (!b->imax) {
2974       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2975       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2976       b->free_imax_ilen = PETSC_TRUE;
2977     }
2978     /* b->ilen will count nonzeros in each block row so far. */
2979     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2980     if (!nnz) {
2981       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2982       else if (nz <= 0)        nz = 1;
2983       for (i=0; i<mbs; i++) b->imax[i] = nz;
2984       nz = nz*mbs;
2985     } else {
2986       nz = 0;
2987       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2988     }
2989 
2990     /* allocate the matrix space */
2991     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2992     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2993     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2994     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2995     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2996     b->singlemalloc = PETSC_TRUE;
2997     b->i[0] = 0;
2998     for (i=1; i<mbs+1; i++) {
2999       b->i[i] = b->i[i-1] + b->imax[i-1];
3000     }
3001     b->free_a     = PETSC_TRUE;
3002     b->free_ij    = PETSC_TRUE;
3003   } else {
3004     b->free_a     = PETSC_FALSE;
3005     b->free_ij    = PETSC_FALSE;
3006   }
3007 
3008   B->rmap->bs          = bs;
3009   b->bs2              = bs2;
3010   b->mbs              = mbs;
3011   b->nz               = 0;
3012   b->maxnz            = nz*bs2;
3013   B->info.nz_unneeded = (PetscReal)b->maxnz;
3014   PetscFunctionReturn(0);
3015 }
3016 EXTERN_C_END
3017 
3018 EXTERN_C_BEGIN
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3021 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3022 {
3023   PetscInt       i,m,nz,nz_max=0,*nnz;
3024   PetscScalar    *values=0;
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028 
3029   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3030 
3031   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3032   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3033   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3034   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3035   m = B->rmap->n/bs;
3036 
3037   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3038   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3039   for(i=0; i<m; i++) {
3040     nz = ii[i+1]- ii[i];
3041     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3042     nz_max = PetscMax(nz_max, nz);
3043     nnz[i] = nz;
3044   }
3045   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3046   ierr = PetscFree(nnz);CHKERRQ(ierr);
3047 
3048   values = (PetscScalar*)V;
3049   if (!values) {
3050     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3051     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3052   }
3053   for (i=0; i<m; i++) {
3054     PetscInt          ncols  = ii[i+1] - ii[i];
3055     const PetscInt    *icols = jj + ii[i];
3056     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3057     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058   }
3059   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3060   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062 
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 
3068 EXTERN_C_BEGIN
3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3070 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3071 EXTERN_C_END
3072 
3073 /*MC
3074    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3075    block sparse compressed row format.
3076 
3077    Options Database Keys:
3078 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3079 
3080   Level: beginner
3081 
3082 .seealso: MatCreateSeqBAIJ()
3083 M*/
3084 
3085 
3086 EXTERN_C_BEGIN
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatCreate_SeqBAIJ"
3089 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3090 {
3091   PetscErrorCode ierr;
3092   PetscMPIInt    size;
3093   Mat_SeqBAIJ    *b;
3094 
3095   PetscFunctionBegin;
3096   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3097   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3098 
3099   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3100   B->data = (void*)b;
3101   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3102   B->mapping               = 0;
3103   b->row                   = 0;
3104   b->col                   = 0;
3105   b->icol                  = 0;
3106   b->reallocs              = 0;
3107   b->saved_values          = 0;
3108 
3109   b->roworiented           = PETSC_TRUE;
3110   b->nonew                 = 0;
3111   b->diag                  = 0;
3112   b->solve_work            = 0;
3113   b->mult_work             = 0;
3114   B->spptr                 = 0;
3115   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3116   b->keepnonzeropattern    = PETSC_FALSE;
3117   b->xtoy                  = 0;
3118   b->XtoY                  = 0;
3119   b->compressedrow.use     = PETSC_FALSE;
3120   b->compressedrow.nrows   = 0;
3121   b->compressedrow.i       = PETSC_NULL;
3122   b->compressedrow.rindex  = PETSC_NULL;
3123   b->compressedrow.checked = PETSC_FALSE;
3124   B->same_nonzero          = PETSC_FALSE;
3125 
3126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3127                                      "MatGetFactorAvailable_seqbaij_petsc",
3128                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3130                                      "MatGetFactor_seqbaij_petsc",
3131                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3132   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3133                                      "MatInvertBlockDiagonal_SeqBAIJ",
3134                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3135   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3136                                      "MatStoreValues_SeqBAIJ",
3137                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3139                                      "MatRetrieveValues_SeqBAIJ",
3140                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3141   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3142                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3143                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3144   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3145                                      "MatConvert_SeqBAIJ_SeqAIJ",
3146                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3147   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3148                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3149                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3151                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3152                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3154                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3155                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3156   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 EXTERN_C_END
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3163 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3164 {
3165   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3166   PetscErrorCode ierr;
3167   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3168 
3169   PetscFunctionBegin;
3170   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3171 
3172   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3173     c->imax = a->imax;
3174     c->ilen = a->ilen;
3175     c->free_imax_ilen = PETSC_FALSE;
3176   } else {
3177     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3178     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3179     for (i=0; i<mbs; i++) {
3180       c->imax[i] = a->imax[i];
3181       c->ilen[i] = a->ilen[i];
3182     }
3183     c->free_imax_ilen = PETSC_TRUE;
3184   }
3185 
3186   /* allocate the matrix space */
3187   if (mallocmatspace){
3188     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3189       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3190       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3191       c->singlemalloc = PETSC_FALSE;
3192       c->free_ij      = PETSC_FALSE;
3193       c->i            = a->i;
3194       c->j            = a->j;
3195       c->parent       = A;
3196       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3197       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3198       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3199     } else {
3200       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3201       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3202       c->singlemalloc = PETSC_TRUE;
3203       c->free_ij      = PETSC_TRUE;
3204       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3205       if (mbs > 0) {
3206 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3207 	if (cpvalues == MAT_COPY_VALUES) {
3208 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3209 	} else {
3210 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3211 	}
3212       }
3213     }
3214   }
3215 
3216   c->roworiented = a->roworiented;
3217   c->nonew       = a->nonew;
3218   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3219   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3220   c->bs2         = a->bs2;
3221   c->mbs         = a->mbs;
3222   c->nbs         = a->nbs;
3223 
3224   if (a->diag) {
3225     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3226       c->diag      = a->diag;
3227       c->free_diag = PETSC_FALSE;
3228     } else {
3229       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3230       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3231       for (i=0; i<mbs; i++) {
3232         c->diag[i] = a->diag[i];
3233       }
3234       c->free_diag = PETSC_TRUE;
3235     }
3236   } else c->diag        = 0;
3237   c->nz                 = a->nz;
3238   c->maxnz              = a->maxnz;
3239   c->solve_work         = 0;
3240   c->mult_work          = 0;
3241   c->free_a             = PETSC_TRUE;
3242   c->free_ij            = PETSC_TRUE;
3243   C->preallocated       = PETSC_TRUE;
3244   C->assembled          = PETSC_TRUE;
3245 
3246   c->compressedrow.use     = a->compressedrow.use;
3247   c->compressedrow.nrows   = a->compressedrow.nrows;
3248   c->compressedrow.checked = a->compressedrow.checked;
3249   if (a->compressedrow.checked && a->compressedrow.use){
3250     i = a->compressedrow.nrows;
3251     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3252     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3253     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3254     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3255   } else {
3256     c->compressedrow.use    = PETSC_FALSE;
3257     c->compressedrow.i      = PETSC_NULL;
3258     c->compressedrow.rindex = PETSC_NULL;
3259   }
3260   C->same_nonzero = A->same_nonzero;
3261   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3267 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3268 {
3269     PetscErrorCode ierr;
3270 
3271   PetscFunctionBegin;
3272   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3273   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3274   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3275   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatLoad_SeqBAIJ"
3281 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3282 {
3283   Mat_SeqBAIJ    *a;
3284   Mat            B;
3285   PetscErrorCode ierr;
3286   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3287   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3288   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3289   PetscInt       *masked,nmask,tmp,bs2,ishift;
3290   PetscMPIInt    size;
3291   int            fd;
3292   PetscScalar    *aa;
3293   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3294 
3295   PetscFunctionBegin;
3296   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3297     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3298   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3299   bs2  = bs*bs;
3300 
3301   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3302   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3303   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3304   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3305   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3306   M = header[1]; N = header[2]; nz = header[3];
3307 
3308   if (header[3] < 0) {
3309     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3310   }
3311 
3312   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3313 
3314   /*
3315      This code adds extra rows to make sure the number of rows is
3316     divisible by the blocksize
3317   */
3318   mbs        = M/bs;
3319   extra_rows = bs - M + bs*(mbs);
3320   if (extra_rows == bs) extra_rows = 0;
3321   else                  mbs++;
3322   if (extra_rows) {
3323     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3324   }
3325 
3326   /* read in row lengths */
3327   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3328   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3329   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3330 
3331   /* read in column indices */
3332   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3333   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3334   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3335 
3336   /* loop over row lengths determining block row lengths */
3337   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3338   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3339   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3340   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3341   rowcount = 0;
3342   nzcount = 0;
3343   for (i=0; i<mbs; i++) {
3344     nmask = 0;
3345     for (j=0; j<bs; j++) {
3346       kmax = rowlengths[rowcount];
3347       for (k=0; k<kmax; k++) {
3348         tmp = jj[nzcount++]/bs;
3349         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3350       }
3351       rowcount++;
3352     }
3353     browlengths[i] += nmask;
3354     /* zero out the mask elements we set */
3355     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356   }
3357 
3358   /* create our matrix */
3359   ierr = MatCreate(comm,&B);
3360   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3361   ierr = MatSetType(B,type);CHKERRQ(ierr);
3362   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3363   a = (Mat_SeqBAIJ*)B->data;
3364 
3365   /* set matrix "i" values */
3366   a->i[0] = 0;
3367   for (i=1; i<= mbs; i++) {
3368     a->i[i]      = a->i[i-1] + browlengths[i-1];
3369     a->ilen[i-1] = browlengths[i-1];
3370   }
3371   a->nz         = 0;
3372   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3373 
3374   /* read in nonzero values */
3375   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3376   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3377   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3378 
3379   /* set "a" and "j" values into matrix */
3380   nzcount = 0; jcount = 0;
3381   for (i=0; i<mbs; i++) {
3382     nzcountb = nzcount;
3383     nmask    = 0;
3384     for (j=0; j<bs; j++) {
3385       kmax = rowlengths[i*bs+j];
3386       for (k=0; k<kmax; k++) {
3387         tmp = jj[nzcount++]/bs;
3388 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3389       }
3390     }
3391     /* sort the masked values */
3392     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3393 
3394     /* set "j" values into matrix */
3395     maskcount = 1;
3396     for (j=0; j<nmask; j++) {
3397       a->j[jcount++]  = masked[j];
3398       mask[masked[j]] = maskcount++;
3399     }
3400     /* set "a" values into matrix */
3401     ishift = bs2*a->i[i];
3402     for (j=0; j<bs; j++) {
3403       kmax = rowlengths[i*bs+j];
3404       for (k=0; k<kmax; k++) {
3405         tmp       = jj[nzcountb]/bs ;
3406         block     = mask[tmp] - 1;
3407         point     = jj[nzcountb] - bs*tmp;
3408         idx       = ishift + bs2*block + j + bs*point;
3409         a->a[idx] = (MatScalar)aa[nzcountb++];
3410       }
3411     }
3412     /* zero out the mask elements we set */
3413     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3414   }
3415   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3416 
3417   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3418   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3419   ierr = PetscFree(aa);CHKERRQ(ierr);
3420   ierr = PetscFree(jj);CHKERRQ(ierr);
3421   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3422 
3423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr = MatView_Private(B);CHKERRQ(ierr);
3426 
3427   *A = B;
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatCreateSeqBAIJ"
3433 /*@C
3434    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3435    compressed row) format.  For good matrix assembly performance the
3436    user should preallocate the matrix storage by setting the parameter nz
3437    (or the array nnz).  By setting these parameters accurately, performance
3438    during matrix assembly can be increased by more than a factor of 50.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator, set to PETSC_COMM_SELF
3444 .  bs - size of block
3445 .  m - number of rows
3446 .  n - number of columns
3447 .  nz - number of nonzero blocks  per block row (same for all rows)
3448 -  nnz - array containing the number of nonzero blocks in the various block rows
3449          (possibly different for each block row) or PETSC_NULL
3450 
3451    Output Parameter:
3452 .  A - the matrix
3453 
3454    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3457 
3458    Options Database Keys:
3459 .   -mat_no_unroll - uses code that does not unroll the loops in the
3460                      block calculations (much slower)
3461 .    -mat_block_size - size of the blocks to use
3462 
3463    Level: intermediate
3464 
3465    Notes:
3466    The number of rows and columns must be divisible by blocksize.
3467 
3468    If the nnz parameter is given then the nz parameter is ignored
3469 
3470    A nonzero block is any block that as 1 or more nonzeros in it
3471 
3472    The block AIJ format is fully compatible with standard Fortran 77
3473    storage.  That is, the stored row and column indices can begin at
3474    either one (as in Fortran) or zero.  See the users' manual for details.
3475 
3476    Specify the preallocated storage with either nz or nnz (not both).
3477    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3478    allocation.  For additional details, see the users manual chapter on
3479    matrices.
3480 
3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3484 {
3485   PetscErrorCode ierr;
3486 
3487   PetscFunctionBegin;
3488   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3489   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3490   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3491   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3497 /*@C
3498    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3499    per row in the matrix. For good matrix assembly performance the
3500    user should preallocate the matrix storage by setting the parameter nz
3501    (or the array nnz).  By setting these parameters accurately, performance
3502    during matrix assembly can be increased by more than a factor of 50.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  A - the matrix
3508 .  bs - size of block
3509 .  nz - number of block nonzeros per block row (same for all rows)
3510 -  nnz - array containing the number of block nonzeros in the various block rows
3511          (possibly different for each block row) or PETSC_NULL
3512 
3513    Options Database Keys:
3514 .   -mat_no_unroll - uses code that does not unroll the loops in the
3515                      block calculations (much slower)
3516 .    -mat_block_size - size of the blocks to use
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521    If the nnz parameter is given then the nz parameter is ignored
3522 
3523    You can call MatGetInfo() to get information on how effective the preallocation was;
3524    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3525    You can also run with the option -info and look for messages with the string
3526    malloc in them to see if additional memory allocation was needed.
3527 
3528    The block AIJ format is fully compatible with standard Fortran 77
3529    storage.  That is, the stored row and column indices can begin at
3530    either one (as in Fortran) or zero.  See the users' manual for details.
3531 
3532    Specify the preallocated storage with either nz or nnz (not both).
3533    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3534    allocation.  For additional details, see the users manual chapter on
3535    matrices.
3536 
3537 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3538 @*/
3539 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3540 {
3541   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3542 
3543   PetscFunctionBegin;
3544   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3545   if (f) {
3546     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3547   }
3548   PetscFunctionReturn(0);
3549 }
3550 
3551 #undef __FUNCT__
3552 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3553 /*@C
3554    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3555    (the default sequential PETSc format).
3556 
3557    Collective on MPI_Comm
3558 
3559    Input Parameters:
3560 +  A - the matrix
3561 .  i - the indices into j for the start of each local row (starts with zero)
3562 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3563 -  v - optional values in the matrix
3564 
3565    Level: developer
3566 
3567 .keywords: matrix, aij, compressed row, sparse
3568 
3569 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3570 @*/
3571 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3572 {
3573   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3574 
3575   PetscFunctionBegin;
3576   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3577   if (f) {
3578     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3586 /*@
3587      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3588               (upper triangular entries in CSR format) provided by the user.
3589 
3590      Collective on MPI_Comm
3591 
3592    Input Parameters:
3593 +  comm - must be an MPI communicator of size 1
3594 .  bs - size of block
3595 .  m - number of rows
3596 .  n - number of columns
3597 .  i - row indices
3598 .  j - column indices
3599 -  a - matrix values
3600 
3601    Output Parameter:
3602 .  mat - the matrix
3603 
3604    Level: intermediate
3605 
3606    Notes:
3607        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3608     once the matrix is destroyed
3609 
3610        You cannot set new nonzero locations into this matrix, that will generate an error.
3611 
3612        The i and j indices are 0 based
3613 
3614 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3615 
3616 @*/
3617 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3618 {
3619   PetscErrorCode ierr;
3620   PetscInt       ii;
3621   Mat_SeqBAIJ    *baij;
3622 
3623   PetscFunctionBegin;
3624   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3625   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3626 
3627   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3628   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3629   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3630   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3631   baij = (Mat_SeqBAIJ*)(*mat)->data;
3632   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3633   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3634 
3635   baij->i = i;
3636   baij->j = j;
3637   baij->a = a;
3638   baij->singlemalloc = PETSC_FALSE;
3639   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3640   baij->free_a       = PETSC_FALSE;
3641   baij->free_ij       = PETSC_FALSE;
3642 
3643   for (ii=0; ii<m; ii++) {
3644     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3645 #if defined(PETSC_USE_DEBUG)
3646     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3647 #endif
3648   }
3649 #if defined(PETSC_USE_DEBUG)
3650   for (ii=0; ii<baij->i[m]; ii++) {
3651     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3652     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3653   }
3654 #endif
3655 
3656   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660