xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e7e72b3d0edcd0d15e7f68c03be08666507fc872)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,j;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         for (j=0; j<nz; j++) {
175           s1 -= v[j]*x[vi[j]];
176         }
177         x[i2]   = idiag[0]*s1;
178         idiag   += 1;
179         i2      += 1;
180       }
181       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
182       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
183     }
184     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
185         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
186       i2    = 0;
187       mdiag = a->idiag+a->mbs;
188       for (i=0; i<m; i++) {
189         x1      = x[i2];
190         x[i2]   = mdiag[0]*x1;
191         mdiag  += 1;
192         i2     += 1;
193       }
194       ierr = PetscLogFlops(m);CHKERRQ(ierr);
195     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
196       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
197     }
198     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
199       idiag   = a->idiag+a->mbs - 1;
200       i2      = m - 1;
201       x1      = x[i2];
202       x[i2]   = idiag[0]*x1;
203       idiag -= 1;
204       i2    -= 1;
205       for (i=m-2; i>=0; i--) {
206         v     = aa + (diag[i]+1);
207         vi    = aj + diag[i] + 1;
208         nz    = ai[i+1] - diag[i] - 1;
209         s1    = x[i2];
210         for (j=0; j<nz; j++) {
211           s1 -= v[j]*x[vi[j]];
212         }
213         x[i2]   = idiag[0]*s1;
214         idiag   -= 1;
215         i2      -= 1;
216       }
217       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
218     }
219   } else {
220     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
221   }
222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 #undef __FUNCT__
228 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
230 {
231   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
232   PetscScalar        *x,x1,x2,s1,s2;
233   const PetscScalar  *b;
234   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
235   PetscErrorCode     ierr;
236   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
237   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
238 
239   PetscFunctionBegin;
240   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
241   its = its*lits;
242   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
243   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
244   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
245   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
246   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
247 
248   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
249 
250   diag  = a->diag;
251   idiag = a->idiag;
252   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
254 
255   if (flag & SOR_ZERO_INITIAL_GUESS) {
256     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
257       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
258       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
259       i2     = 2;
260       idiag += 4;
261       for (i=1; i<m; i++) {
262 	v     = aa + 4*ai[i];
263 	vi    = aj + ai[i];
264 	nz    = diag[i] - ai[i];
265 	s1    = b[i2]; s2 = b[i2+1];
266         for (j=0; j<nz; j++) {
267 	  idx  = 2*vi[j];
268           it   = 4*j;
269 	  x1   = x[idx]; x2 = x[1+idx];
270 	  s1  -= v[it]*x1 + v[it+2]*x2;
271 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
272 	}
273 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
274 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
275         idiag   += 4;
276         i2      += 2;
277       }
278       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
279       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
280     }
281     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
282         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
283       i2    = 0;
284       mdiag = a->idiag+4*a->mbs;
285       for (i=0; i<m; i++) {
286         x1      = x[i2]; x2 = x[i2+1];
287         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
288         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
289         mdiag  += 4;
290         i2     += 2;
291       }
292       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
293     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
294       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
295     }
296     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
297       idiag   = a->idiag+4*a->mbs - 4;
298       i2      = 2*m - 2;
299       x1      = x[i2]; x2 = x[i2+1];
300       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
301       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
302       idiag -= 4;
303       i2    -= 2;
304       for (i=m-2; i>=0; i--) {
305 	v     = aa + 4*(diag[i]+1);
306 	vi    = aj + diag[i] + 1;
307 	nz    = ai[i+1] - diag[i] - 1;
308 	s1    = x[i2]; s2 = x[i2+1];
309         for (j=0; j<nz; j++) {
310  	  idx  = 2*vi[j];
311           it   = 4*j;
312 	  x1   = x[idx]; x2 = x[1+idx];
313 	  s1  -= v[it]*x1 + v[it+2]*x2;
314 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
315 	}
316 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
317 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
318         idiag   -= 4;
319         i2      -= 2;
320       }
321       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
322     }
323   } else {
324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
325   }
326   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
334 {
335   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
336   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
337   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
338   const PetscScalar  *b;
339   PetscErrorCode     ierr;
340   PetscInt           m = a->mbs,i,i2,nz,idx;
341   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
342 
343   PetscFunctionBegin;
344   its = its*lits;
345   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
346   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
347   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
348   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
349   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
350   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
351 
352   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
353 
354   diag  = a->diag;
355   idiag = a->idiag;
356   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
358 
359   if (flag & SOR_ZERO_INITIAL_GUESS) {
360     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
361       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
362       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
363       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
364       i2     = 3;
365       idiag += 9;
366       for (i=1; i<m; i++) {
367         v     = aa + 9*ai[i];
368         vi    = aj + ai[i];
369         nz    = diag[i] - ai[i];
370         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
371         while (nz--) {
372           idx  = 3*(*vi++);
373           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
374           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
375           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
376           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
377           v   += 9;
378         }
379         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
380         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
381         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
382         idiag   += 9;
383         i2      += 3;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+9*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
395         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
396         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
397         mdiag  += 9;
398         i2     += 3;
399       }
400       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
401     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
402       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
403     }
404     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
405       idiag   = a->idiag+9*a->mbs - 9;
406       i2      = 3*m - 3;
407       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
408       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
409       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
410       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
411       idiag -= 9;
412       i2    -= 3;
413       for (i=m-2; i>=0; i--) {
414         v     = aa + 9*(diag[i]+1);
415         vi    = aj + diag[i] + 1;
416         nz    = ai[i+1] - diag[i] - 1;
417         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
418         while (nz--) {
419           idx  = 3*(*vi++);
420           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
421           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
422           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
423           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
424           v   += 9;
425         }
426         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
427         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
428         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
429         idiag   -= 9;
430         i2      -= 3;
431       }
432       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
433     }
434   } else {
435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436   }
437   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
438   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
445 {
446   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
447   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
448   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
449   const PetscScalar  *b;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
461   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
476       i2     = 4;
477       idiag += 16;
478       for (i=1; i<m; i++) {
479 	v     = aa + 16*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
483 	while (nz--) {
484 	  idx  = 4*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
486 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
487 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
488 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
489 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
490 	  v   += 16;
491 	}
492 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
493 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
494 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
495 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
496         idiag   += 16;
497         i2      += 4;
498       }
499       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
501     }
502     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504       i2    = 0;
505       mdiag = a->idiag+16*a->mbs;
506       for (i=0; i<m; i++) {
507         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
509         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
510         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
511         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
512         mdiag  += 16;
513         i2     += 4;
514       }
515       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
516     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
517       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
518     }
519     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
520       idiag   = a->idiag+16*a->mbs - 16;
521       i2      = 4*m - 4;
522       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
523       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
524       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
525       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
526       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
527       idiag -= 16;
528       i2    -= 4;
529       for (i=m-2; i>=0; i--) {
530 	v     = aa + 16*(diag[i]+1);
531 	vi    = aj + diag[i] + 1;
532 	nz    = ai[i+1] - diag[i] - 1;
533 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
534 	while (nz--) {
535 	  idx  = 4*(*vi++);
536 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
537 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
538 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
539 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
540 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
541 	  v   += 16;
542 	}
543 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
544 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
545 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
546 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
547         idiag   -= 16;
548         i2      -= 4;
549       }
550       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
551     }
552   } else {
553     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
554   }
555   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
556   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
563 {
564   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
565   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
566   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
567   const PetscScalar  *b;
568   PetscErrorCode     ierr;
569   PetscInt           m = a->mbs,i,i2,nz,idx;
570   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
571 
572   PetscFunctionBegin;
573   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
574   its = its*lits;
575   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
576   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
577   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
578   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
579   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
580 
581   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
582 
583   diag  = a->diag;
584   idiag = a->idiag;
585   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
587 
588   if (flag & SOR_ZERO_INITIAL_GUESS) {
589     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
590       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
591       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
592       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
593       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
594       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
595       i2     = 5;
596       idiag += 25;
597       for (i=1; i<m; i++) {
598 	v     = aa + 25*ai[i];
599 	vi    = aj + ai[i];
600 	nz    = diag[i] - ai[i];
601 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
602 	while (nz--) {
603 	  idx  = 5*(*vi++);
604 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
606 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
607 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
608 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
609 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
610 	  v   += 25;
611 	}
612 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
613 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
614 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
615 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
616 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
617         idiag   += 25;
618         i2      += 5;
619       }
620       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
621       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
622     }
623     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
624         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
625       i2    = 0;
626       mdiag = a->idiag+25*a->mbs;
627       for (i=0; i<m; i++) {
628         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
630         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
631         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
632         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
633         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
634         mdiag  += 25;
635         i2     += 5;
636       }
637       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
638     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
639       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
640     }
641     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
642       idiag   = a->idiag+25*a->mbs - 25;
643       i2      = 5*m - 5;
644       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
645       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
646       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
647       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
648       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
649       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
650       idiag -= 25;
651       i2    -= 5;
652       for (i=m-2; i>=0; i--) {
653 	v     = aa + 25*(diag[i]+1);
654 	vi    = aj + diag[i] + 1;
655 	nz    = ai[i+1] - diag[i] - 1;
656 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
657 	while (nz--) {
658 	  idx  = 5*(*vi++);
659 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
660 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
661 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
662 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
663 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
664 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
665 	  v   += 25;
666 	}
667 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
668 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
669 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
670 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
671 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
672         idiag   -= 25;
673         i2      -= 5;
674       }
675       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
676     }
677   } else {
678     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
679   }
680   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
681   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 #undef __FUNCT__
686 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
688 {
689   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
690   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
691   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
692   const PetscScalar  *b;
693   PetscErrorCode     ierr;
694   PetscInt           m = a->mbs,i,i2,nz,idx;
695   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
696 
697   PetscFunctionBegin;
698   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
699   its = its*lits;
700   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
701   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
702   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
703   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
704   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
705 
706   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
707 
708   diag  = a->diag;
709   idiag = a->idiag;
710   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
711   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
712 
713   if (flag & SOR_ZERO_INITIAL_GUESS) {
714     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
715       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
716       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
717       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
718       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
719       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
720       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
721       i2     = 6;
722       idiag += 36;
723       for (i=1; i<m; i++) {
724         v     = aa + 36*ai[i];
725         vi    = aj + ai[i];
726         nz    = diag[i] - ai[i];
727         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
728         while (nz--) {
729           idx  = 6*(*vi++);
730           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
731           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
732           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
733           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
734           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
735           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
736           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
737           v   += 36;
738         }
739         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
740         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
741         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
742         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
743         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
744         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
745         idiag   += 36;
746         i2      += 6;
747       }
748       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
749       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
750     }
751     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
752         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
753       i2    = 0;
754       mdiag = a->idiag+36*a->mbs;
755       for (i=0; i<m; i++) {
756         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
758         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
759         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
760         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
761         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
762         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
763         mdiag  += 36;
764         i2     += 6;
765       }
766       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
767     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
769     }
770     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
771       idiag   = a->idiag+36*a->mbs - 36;
772       i2      = 6*m - 6;
773       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
774       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
775       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
776       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
777       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
778       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
779       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
780       idiag -= 36;
781       i2    -= 6;
782       for (i=m-2; i>=0; i--) {
783         v     = aa + 36*(diag[i]+1);
784         vi    = aj + diag[i] + 1;
785         nz    = ai[i+1] - diag[i] - 1;
786         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
787         while (nz--) {
788           idx  = 6*(*vi++);
789           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
790           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
791           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
792           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
793           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
794           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
795           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
796           v   += 36;
797         }
798         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
799         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
800         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
801         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
802         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
803         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
804         idiag   -= 36;
805         i2      -= 6;
806       }
807       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
808     }
809   } else {
810     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
811   }
812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
813   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
820 {
821   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
822   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
823   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
824   const PetscScalar  *b;
825   PetscErrorCode     ierr;
826   PetscInt           m = a->mbs,i,i2,nz,idx;
827   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
828 
829   PetscFunctionBegin;
830   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
831   its = its*lits;
832   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
833   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
834   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
835   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
836   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
837 
838   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
839 
840   diag  = a->diag;
841   idiag = a->idiag;
842   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
843   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
844 
845   if (flag & SOR_ZERO_INITIAL_GUESS) {
846     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
847       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
848       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
849       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
850       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
851       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
852       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
853       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
854       i2     = 7;
855       idiag += 49;
856       for (i=1; i<m; i++) {
857         v     = aa + 49*ai[i];
858         vi    = aj + ai[i];
859         nz    = diag[i] - ai[i];
860         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
861         while (nz--) {
862           idx  = 7*(*vi++);
863           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
864           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
865           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
866           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
867           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
868           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
869           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
870           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
871           v   += 49;
872         }
873         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
874         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
875         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
876         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
877         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
878         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
879         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
880         idiag   += 49;
881         i2      += 7;
882       }
883       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
884       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
885     }
886     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
887         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
888       i2    = 0;
889       mdiag = a->idiag+49*a->mbs;
890       for (i=0; i<m; i++) {
891         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
893         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
894         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
895         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
896         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
897         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
898         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
899         mdiag  += 36;
900         i2     += 6;
901       }
902       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
903     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
905     }
906     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
907       idiag   = a->idiag+49*a->mbs - 49;
908       i2      = 7*m - 7;
909       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
910       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
911       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
912       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
913       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
914       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
915       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
916       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
917       idiag -= 49;
918       i2    -= 7;
919       for (i=m-2; i>=0; i--) {
920         v     = aa + 49*(diag[i]+1);
921         vi    = aj + diag[i] + 1;
922         nz    = ai[i+1] - diag[i] - 1;
923         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
924         while (nz--) {
925           idx  = 7*(*vi++);
926           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
927           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
928           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
929           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
930           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
931           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
932           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
933           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
934           v   += 49;
935         }
936         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
937         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
938         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
939         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
940         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
941         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
942         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
943         idiag   -= 49;
944         i2      -= 7;
945       }
946       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
947     }
948   } else {
949     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
950   }
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957     Special version for direct calls from Fortran (Used in PETSc-fun3d)
958 */
959 #if defined(PETSC_HAVE_FORTRAN_CAPS)
960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
962 #define matsetvaluesblocked4_ matsetvaluesblocked4
963 #endif
964 
965 EXTERN_C_BEGIN
966 #undef __FUNCT__
967 #define __FUNCT__ "matsetvaluesblocked4_"
968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
969 {
970   Mat               A = *AA;
971   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
972   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
973   PetscInt          *ai=a->i,*ailen=a->ilen;
974   PetscInt          *aj=a->j,stepval,lastcol = -1;
975   const PetscScalar *value = v;
976   MatScalar         *ap,*aa = a->a,*bap;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol) low = 0; else high = nrow;
991       lastcol = col;
992       value = v + k*(stepval+4 + l)*4;
993       while (high-low > 7) {
994         t = (low+high)/2;
995         if (rp[t] > col) high = t;
996         else             low  = t;
997       }
998       for (i=low; i<high; i++) {
999         if (rp[i] > col) break;
1000         if (rp[i] == col) {
1001           bap  = ap +  16*i;
1002           for (ii=0; ii<4; ii++,value+=stepval) {
1003             for (jj=ii; jj<16; jj+=4) {
1004               bap[jj] += *value++;
1005             }
1006           }
1007           goto noinsert2;
1008         }
1009       }
1010       N = nrow++ - 1;
1011       high++; /* added new column index thus must search to one higher than before */
1012       /* shift up all the later entries in this row */
1013       for (ii=N; ii>=i; ii--) {
1014         rp[ii+1] = rp[ii];
1015         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1016       }
1017       if (N >= i) {
1018         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1019       }
1020       rp[i] = col;
1021       bap   = ap +  16*i;
1022       for (ii=0; ii<4; ii++,value+=stepval) {
1023         for (jj=ii; jj<16; jj+=4) {
1024           bap[jj] = *value++;
1025         }
1026       }
1027       noinsert2:;
1028       low = i;
1029     }
1030     ailen[row] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 EXTERN_C_END
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 EXTERN_C_BEGIN
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatSetValues4_"
1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1046 {
1047   Mat         A = *AA;
1048   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1049   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1050   PetscInt    *ai=a->i,*ailen=a->ilen;
1051   PetscInt    *aj=a->j,brow,bcol;
1052   PetscInt    ridx,cidx,lastcol = -1;
1053   MatScalar   *ap,value,*aa=a->a,*bap;
1054 
1055   PetscFunctionBegin;
1056   for (k=0; k<m; k++) { /* loop over added rows */
1057     row  = im[k]; brow = row/4;
1058     rp   = aj + ai[brow];
1059     ap   = aa + 16*ai[brow];
1060     nrow = ailen[brow];
1061     low  = 0;
1062     high = nrow;
1063     for (l=0; l<n; l++) { /* loop over added columns */
1064       col = in[l]; bcol = col/4;
1065       ridx = row % 4; cidx = col % 4;
1066       value = v[l + k*n];
1067       if (col <= lastcol) low = 0; else high = nrow;
1068       lastcol = col;
1069       while (high-low > 7) {
1070         t = (low+high)/2;
1071         if (rp[t] > bcol) high = t;
1072         else              low  = t;
1073       }
1074       for (i=low; i<high; i++) {
1075         if (rp[i] > bcol) break;
1076         if (rp[i] == bcol) {
1077           bap  = ap +  16*i + 4*cidx + ridx;
1078           *bap += value;
1079           goto noinsert1;
1080         }
1081       }
1082       N = nrow++ - 1;
1083       high++; /* added new column thus must search to one higher than before */
1084       /* shift up all the later entries in this row */
1085       for (ii=N; ii>=i; ii--) {
1086         rp[ii+1] = rp[ii];
1087         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1088       }
1089       if (N>=i) {
1090         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1091       }
1092       rp[i]                    = bcol;
1093       ap[16*i + 4*cidx + ridx] = value;
1094       noinsert1:;
1095       low = i;
1096     }
1097     ailen[brow] = nrow;
1098   }
1099   PetscFunctionReturnVoid();
1100 }
1101 EXTERN_C_END
1102 
1103 /*
1104      Checks for missing diagonals
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1109 {
1110   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       *diag,*jj = a->j,i;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1116   *missing = PETSC_FALSE;
1117   if (A->rmap->n > 0 && !jj) {
1118     *missing  = PETSC_TRUE;
1119     if (d) *d = 0;
1120     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1121   } else {
1122     diag     = a->diag;
1123     for (i=0; i<a->mbs; i++) {
1124       if (jj[diag[i]] != i) {
1125         *missing  = PETSC_TRUE;
1126         if (d) *d = i;
1127         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1128       }
1129     }
1130   }
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1137 {
1138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1139   PetscErrorCode ierr;
1140   PetscInt       i,j,m = a->mbs;
1141 
1142   PetscFunctionBegin;
1143   if (!a->diag) {
1144     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1145     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1146     a->free_diag = PETSC_TRUE;
1147   }
1148   for (i=0; i<m; i++) {
1149     a->diag[i] = a->i[i+1];
1150     for (j=a->i[i]; j<a->i[i+1]; j++) {
1151       if (a->j[j] == i) {
1152         a->diag[i] = j;
1153         break;
1154       }
1155     }
1156   }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 
1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1166 {
1167   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1170   PetscInt       *tia, *tja;
1171 
1172   PetscFunctionBegin;
1173   *nn = n;
1174   if (!ia) PetscFunctionReturn(0);
1175   if (symmetric) {
1176     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1177     nz   = tia[n];
1178   } else {
1179     tia = a->i; tja = a->j;
1180   }
1181 
1182   if (!blockcompressed && bs > 1) {
1183     (*nn) *= bs;
1184     nbs    = bs;
1185     /* malloc & create the natural set of indices */
1186     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1187     if (n) {
1188       (*ia)[0] = 0;
1189       for (j=1; j<bs; j++) {
1190         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1191       }
1192     }
1193 
1194     for (i=1; i<n; i++) {
1195       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1196       for (j=1; j<bs; j++) {
1197         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1198       }
1199     }
1200     if (n) {
1201       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1202     }
1203 
1204     if (ja) {
1205       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1206       cnt = 0;
1207       for (i=0; i<n; i++) {
1208         for (j=0; j<bs; j++) {
1209           for (k=tia[i]; k<tia[i+1]; k++) {
1210             for (l=0; l<bs; l++) {
1211               (*ja)[cnt++] = bs*tja[k] + l;
1212 	    }
1213           }
1214         }
1215       }
1216     }
1217 
1218     n     *= bs;
1219     nz *= bs*bs;
1220     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1221       ierr = PetscFree(tia);CHKERRQ(ierr);
1222       ierr = PetscFree(tja);CHKERRQ(ierr);
1223     }
1224   } else if (oshift == 1) {
1225     if (symmetric) {
1226       PetscInt nz = tia[A->rmap->n/bs];
1227       /*  add 1 to i and j indices */
1228       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1229       *ia = tia;
1230       if (ja) {
1231 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1232         *ja = tja;
1233       }
1234     } else {
1235       PetscInt nz = a->i[A->rmap->n/bs];
1236       /* malloc space and  add 1 to i and j indices */
1237       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1238       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1239       if (ja) {
1240 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1241 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1242       }
1243     }
1244   } else {
1245     *ia = tia;
1246     if (ja) *ja = tja;
1247   }
1248 
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1255 {
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259   if (!ia) PetscFunctionReturn(0);
1260   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1261     ierr = PetscFree(*ia);CHKERRQ(ierr);
1262     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1270 {
1271   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1277 #endif
1278   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1279   if (a->row) {
1280     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1281   }
1282   if (a->col) {
1283     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1284   }
1285   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1286   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1287   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1288   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1289   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1290   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1291   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1292   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1293   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1294 
1295   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1296   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1297   ierr = PetscFree(a);CHKERRQ(ierr);
1298 
1299   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1314 {
1315   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   switch (op) {
1320   case MAT_ROW_ORIENTED:
1321     a->roworiented    = flg;
1322     break;
1323   case MAT_KEEP_NONZERO_PATTERN:
1324     a->keepnonzeropattern = flg;
1325     break;
1326   case MAT_NEW_NONZERO_LOCATIONS:
1327     a->nonew          = (flg ? 0 : 1);
1328     break;
1329   case MAT_NEW_NONZERO_LOCATION_ERR:
1330     a->nonew          = (flg ? -1 : 0);
1331     break;
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333     a->nonew          = (flg ? -2 : 0);
1334     break;
1335   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1336     a->nounused       = (flg ? -1 : 0);
1337     break;
1338   case MAT_NEW_DIAGONALS:
1339   case MAT_IGNORE_OFF_PROC_ENTRIES:
1340   case MAT_USE_HASH_TABLE:
1341     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1342     break;
1343   case MAT_SYMMETRIC:
1344   case MAT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_HERMITIAN:
1346   case MAT_SYMMETRY_ETERNAL:
1347     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1348     break;
1349   default:
1350     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358 {
1359   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1360   PetscErrorCode ierr;
1361   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1362   MatScalar      *aa,*aa_i;
1363   PetscScalar    *v_i;
1364 
1365   PetscFunctionBegin;
1366   bs  = A->rmap->bs;
1367   ai  = a->i;
1368   aj  = a->j;
1369   aa  = a->a;
1370   bs2 = a->bs2;
1371 
1372   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1373 
1374   bn  = row/bs;   /* Block number */
1375   bp  = row % bs; /* Block Position */
1376   M   = ai[bn+1] - ai[bn];
1377   *nz = bs*M;
1378 
1379   if (v) {
1380     *v = 0;
1381     if (*nz) {
1382       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1383       for (i=0; i<M; i++) { /* for each block in the block row */
1384         v_i  = *v + i*bs;
1385         aa_i = aa + bs2*(ai[bn] + i);
1386         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1387       }
1388     }
1389   }
1390 
1391   if (idx) {
1392     *idx = 0;
1393     if (*nz) {
1394       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1395       for (i=0; i<M; i++) { /* for each block in the block row */
1396         idx_i = *idx + i*bs;
1397         itmp  = bs*aj[ai[bn] + i];
1398         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1399       }
1400     }
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1413   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1422 {
1423   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1424   Mat            C;
1425   PetscErrorCode ierr;
1426   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1427   PetscInt       *rows,*cols,bs2=a->bs2;
1428   MatScalar      *array;
1429 
1430   PetscFunctionBegin;
1431   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1432   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1433     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1434     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1435 
1436     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1437     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1438     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1439     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1440     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1441     ierr = PetscFree(col);CHKERRQ(ierr);
1442   } else {
1443     C = *B;
1444   }
1445 
1446   array = a->a;
1447   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1448   for (i=0; i<mbs; i++) {
1449     cols[0] = i*bs;
1450     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1451     len = ai[i+1] - ai[i];
1452     for (j=0; j<len; j++) {
1453       rows[0] = (*aj++)*bs;
1454       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1455       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1456       array += bs2;
1457     }
1458   }
1459   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1460 
1461   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463 
1464   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1465     *B = C;
1466   } else {
1467     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1474 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1475 {
1476   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1479   int            fd;
1480   PetscScalar    *aa;
1481   FILE           *file;
1482 
1483   PetscFunctionBegin;
1484   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1485   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1486   col_lens[0] = MAT_FILE_CLASSID;
1487 
1488   col_lens[1] = A->rmap->N;
1489   col_lens[2] = A->cmap->n;
1490   col_lens[3] = a->nz*bs2;
1491 
1492   /* store lengths of each row and write (including header) to file */
1493   count = 0;
1494   for (i=0; i<a->mbs; i++) {
1495     for (j=0; j<bs; j++) {
1496       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1497     }
1498   }
1499   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1500   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1501 
1502   /* store column indices (zero start index) */
1503   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1504   count = 0;
1505   for (i=0; i<a->mbs; i++) {
1506     for (j=0; j<bs; j++) {
1507       for (k=a->i[i]; k<a->i[i+1]; k++) {
1508         for (l=0; l<bs; l++) {
1509           jj[count++] = bs*a->j[k] + l;
1510         }
1511       }
1512     }
1513   }
1514   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1515   ierr = PetscFree(jj);CHKERRQ(ierr);
1516 
1517   /* store nonzero values */
1518   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1519   count = 0;
1520   for (i=0; i<a->mbs; i++) {
1521     for (j=0; j<bs; j++) {
1522       for (k=a->i[i]; k<a->i[i+1]; k++) {
1523         for (l=0; l<bs; l++) {
1524           aa[count++] = a->a[bs2*k + l*bs + j];
1525         }
1526       }
1527     }
1528   }
1529   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1530   ierr = PetscFree(aa);CHKERRQ(ierr);
1531 
1532   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1533   if (file) {
1534     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1542 {
1543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1544   PetscErrorCode    ierr;
1545   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1546   PetscViewerFormat format;
1547 
1548   PetscFunctionBegin;
1549   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1550   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1551     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1552   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1553     Mat aij;
1554     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1555     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1556     ierr = MatDestroy(aij);CHKERRQ(ierr);
1557   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1558      PetscFunctionReturn(0);
1559   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1560     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1561     for (i=0; i<a->mbs; i++) {
1562       for (j=0; j<bs; j++) {
1563         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1564         for (k=a->i[i]; k<a->i[i+1]; k++) {
1565           for (l=0; l<bs; l++) {
1566 #if defined(PETSC_USE_COMPLEX)
1567             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1568               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1569                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1571               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1572                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1573             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1574               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1575             }
1576 #else
1577             if (a->a[bs2*k + l*bs + j] != 0.0) {
1578               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1579             }
1580 #endif
1581           }
1582         }
1583         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1584       }
1585     }
1586     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1587   } else {
1588     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1589     for (i=0; i<a->mbs; i++) {
1590       for (j=0; j<bs; j++) {
1591         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1592         for (k=a->i[i]; k<a->i[i+1]; k++) {
1593           for (l=0; l<bs; l++) {
1594 #if defined(PETSC_USE_COMPLEX)
1595             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1596               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1597                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1598             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1599               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1600                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1601             } else {
1602               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1603             }
1604 #else
1605             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1606 #endif
1607           }
1608         }
1609         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1610       }
1611     }
1612     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1613   }
1614   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1620 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1621 {
1622   Mat            A = (Mat) Aa;
1623   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1624   PetscErrorCode ierr;
1625   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1626   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1627   MatScalar      *aa;
1628   PetscViewer    viewer;
1629 
1630   PetscFunctionBegin;
1631 
1632   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1633   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1634 
1635   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1636 
1637   /* loop over matrix elements drawing boxes */
1638   color = PETSC_DRAW_BLUE;
1639   for (i=0,row=0; i<mbs; i++,row+=bs) {
1640     for (j=a->i[i]; j<a->i[i+1]; j++) {
1641       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643       aa = a->a + j*bs2;
1644       for (k=0; k<bs; k++) {
1645         for (l=0; l<bs; l++) {
1646           if (PetscRealPart(*aa++) >=  0.) continue;
1647           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648         }
1649       }
1650     }
1651   }
1652   color = PETSC_DRAW_CYAN;
1653   for (i=0,row=0; i<mbs; i++,row+=bs) {
1654     for (j=a->i[i]; j<a->i[i+1]; j++) {
1655       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657       aa = a->a + j*bs2;
1658       for (k=0; k<bs; k++) {
1659         for (l=0; l<bs; l++) {
1660           if (PetscRealPart(*aa++) != 0.) continue;
1661           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1662         }
1663       }
1664     }
1665   }
1666 
1667   color = PETSC_DRAW_RED;
1668   for (i=0,row=0; i<mbs; i++,row+=bs) {
1669     for (j=a->i[i]; j<a->i[i+1]; j++) {
1670       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672       aa = a->a + j*bs2;
1673       for (k=0; k<bs; k++) {
1674         for (l=0; l<bs; l++) {
1675           if (PetscRealPart(*aa++) <= 0.) continue;
1676           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1677         }
1678       }
1679     }
1680   }
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687 {
1688   PetscErrorCode ierr;
1689   PetscReal      xl,yl,xr,yr,w,h;
1690   PetscDraw      draw;
1691   PetscTruth     isnull;
1692 
1693   PetscFunctionBegin;
1694 
1695   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1696   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1697 
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1699   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700   xr += w;    yr += h;  xl = -w;     yl = -h;
1701   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1702   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1703   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatView_SeqBAIJ"
1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710 {
1711   PetscErrorCode ierr;
1712   PetscTruth     iascii,isbinary,isdraw;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1717   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1718   if (iascii){
1719     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1720   } else if (isbinary) {
1721     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1722   } else if (isdraw) {
1723     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1724   } else {
1725     Mat B;
1726     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1727     ierr = MatView(B,viewer);CHKERRQ(ierr);
1728     ierr = MatDestroy(B);CHKERRQ(ierr);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1736 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1737 {
1738   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1739   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1740   PetscInt    *ai = a->i,*ailen = a->ilen;
1741   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1742   MatScalar   *ap,*aa = a->a;
1743 
1744   PetscFunctionBegin;
1745   for (k=0; k<m; k++) { /* loop over rows */
1746     row  = im[k]; brow = row/bs;
1747     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1748     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1749     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1750     nrow = ailen[brow];
1751     for (l=0; l<n; l++) { /* loop over columns */
1752       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1753       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1754       col  = in[l] ;
1755       bcol = col/bs;
1756       cidx = col%bs;
1757       ridx = row%bs;
1758       high = nrow;
1759       low  = 0; /* assume unsorted */
1760       while (high-low > 5) {
1761         t = (low+high)/2;
1762         if (rp[t] > bcol) high = t;
1763         else             low  = t;
1764       }
1765       for (i=low; i<high; i++) {
1766         if (rp[i] > bcol) break;
1767         if (rp[i] == bcol) {
1768           *v++ = ap[bs2*i+bs*cidx+ridx];
1769           goto finished;
1770         }
1771       }
1772       *v++ = 0.0;
1773       finished:;
1774     }
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #define CHUNKSIZE 10
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1783 {
1784   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1785   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787   PetscErrorCode    ierr;
1788   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1789   PetscTruth        roworiented=a->roworiented;
1790   const PetscScalar *value = v;
1791   MatScalar         *ap,*aa = a->a,*bap;
1792 
1793   PetscFunctionBegin;
1794   if (roworiented) {
1795     stepval = (n-1)*bs;
1796   } else {
1797     stepval = (m-1)*bs;
1798   }
1799   for (k=0; k<m; k++) { /* loop over added rows */
1800     row  = im[k];
1801     if (row < 0) continue;
1802 #if defined(PETSC_USE_DEBUG)
1803     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804 #endif
1805     rp   = aj + ai[row];
1806     ap   = aa + bs2*ai[row];
1807     rmax = imax[row];
1808     nrow = ailen[row];
1809     low  = 0;
1810     high = nrow;
1811     for (l=0; l<n; l++) { /* loop over added columns */
1812       if (in[l] < 0) continue;
1813 #if defined(PETSC_USE_DEBUG)
1814       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815 #endif
1816       col = in[l];
1817       if (roworiented) {
1818         value = v + k*(stepval+bs)*bs + l*bs;
1819       } else {
1820         value = v + l*(stepval+bs)*bs + k*bs;
1821       }
1822       if (col <= lastcol) low = 0; else high = nrow;
1823       lastcol = col;
1824       while (high-low > 7) {
1825         t = (low+high)/2;
1826         if (rp[t] > col) high = t;
1827         else             low  = t;
1828       }
1829       for (i=low; i<high; i++) {
1830         if (rp[i] > col) break;
1831         if (rp[i] == col) {
1832           bap  = ap +  bs2*i;
1833           if (roworiented) {
1834             if (is == ADD_VALUES) {
1835               for (ii=0; ii<bs; ii++,value+=stepval) {
1836                 for (jj=ii; jj<bs2; jj+=bs) {
1837                   bap[jj] += *value++;
1838                 }
1839               }
1840             } else {
1841               for (ii=0; ii<bs; ii++,value+=stepval) {
1842                 for (jj=ii; jj<bs2; jj+=bs) {
1843                   bap[jj] = *value++;
1844                 }
1845               }
1846             }
1847           } else {
1848             if (is == ADD_VALUES) {
1849               for (ii=0; ii<bs; ii++,value+=stepval) {
1850                 for (jj=0; jj<bs; jj++) {
1851                   *bap++ += *value++;
1852                 }
1853               }
1854             } else {
1855               for (ii=0; ii<bs; ii++,value+=stepval) {
1856                 for (jj=0; jj<bs; jj++) {
1857                   *bap++  = *value++;
1858                 }
1859               }
1860             }
1861           }
1862           goto noinsert2;
1863         }
1864       }
1865       if (nonew == 1) goto noinsert2;
1866       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868       N = nrow++ - 1; high++;
1869       /* shift up all the later entries in this row */
1870       for (ii=N; ii>=i; ii--) {
1871         rp[ii+1] = rp[ii];
1872         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1873       }
1874       if (N >= i) {
1875         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1876       }
1877       rp[i] = col;
1878       bap   = ap +  bs2*i;
1879       if (roworiented) {
1880         for (ii=0; ii<bs; ii++,value+=stepval) {
1881           for (jj=ii; jj<bs2; jj+=bs) {
1882             bap[jj] = *value++;
1883           }
1884         }
1885       } else {
1886         for (ii=0; ii<bs; ii++,value+=stepval) {
1887           for (jj=0; jj<bs; jj++) {
1888             *bap++  = *value++;
1889           }
1890         }
1891       }
1892       noinsert2:;
1893       low = i;
1894     }
1895     ailen[row] = nrow;
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1902 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903 {
1904   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1905   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1907   PetscErrorCode ierr;
1908   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909   MatScalar      *aa = a->a,*ap;
1910   PetscReal      ratio=0.6;
1911 
1912   PetscFunctionBegin;
1913   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1914 
1915   if (m) rmax = ailen[0];
1916   for (i=1; i<mbs; i++) {
1917     /* move each row back by the amount of empty slots (fshift) before it*/
1918     fshift += imax[i-1] - ailen[i-1];
1919     rmax   = PetscMax(rmax,ailen[i]);
1920     if (fshift) {
1921       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922       N = ailen[i];
1923       for (j=0; j<N; j++) {
1924         ip[j-fshift] = ip[j];
1925         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1926       }
1927     }
1928     ai[i] = ai[i-1] + ailen[i-1];
1929   }
1930   if (mbs) {
1931     fshift += imax[mbs-1] - ailen[mbs-1];
1932     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933   }
1934   /* reset ilen and imax for each row */
1935   for (i=0; i<mbs; i++) {
1936     ailen[i] = imax[i] = ai[i+1] - ai[i];
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   if (fshift && a->nounused == -1) {
1948     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1949   }
1950   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1953   a->reallocs          = 0;
1954   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1955 
1956   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1957   if (a->compressedrow.use){
1958     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1959   }
1960 
1961   A->same_nonzero = PETSC_TRUE;
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 /*
1966    This function returns an array of flags which indicate the locations of contiguous
1967    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1968    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1969    Assume: sizes should be long enough to hold all the values.
1970 */
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1973 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1974 {
1975   PetscInt   i,j,k,row;
1976   PetscTruth flg;
1977 
1978   PetscFunctionBegin;
1979   for (i=0,j=0; i<n; j++) {
1980     row = idx[i];
1981     if (row%bs!=0) { /* Not the begining of a block */
1982       sizes[j] = 1;
1983       i++;
1984     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1985       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1986       i++;
1987     } else { /* Begining of the block, so check if the complete block exists */
1988       flg = PETSC_TRUE;
1989       for (k=1; k<bs; k++) {
1990         if (row+k != idx[i+k]) { /* break in the block */
1991           flg = PETSC_FALSE;
1992           break;
1993         }
1994       }
1995       if (flg) { /* No break in the bs */
1996         sizes[j] = bs;
1997         i+= bs;
1998       } else {
1999         sizes[j] = 1;
2000         i++;
2001       }
2002     }
2003   }
2004   *bs_max = j;
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2010 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2011 {
2012   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2013   PetscErrorCode ierr;
2014   PetscInt       i,j,k,count,*rows;
2015   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2016   PetscScalar    zero = 0.0;
2017   MatScalar      *aa;
2018 
2019   PetscFunctionBegin;
2020   /* Make a copy of the IS and  sort it */
2021   /* allocate memory for rows,sizes */
2022   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2023 
2024   /* copy IS values to rows, and sort them */
2025   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2026   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2027   if (baij->keepnonzeropattern) {
2028     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2029     bs_max = is_n;
2030     A->same_nonzero = PETSC_TRUE;
2031   } else {
2032     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2033     A->same_nonzero = PETSC_FALSE;
2034   }
2035 
2036   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2037     row   = rows[j];
2038     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2039     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2040     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2041     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2042       if (diag != 0.0) {
2043         if (baij->ilen[row/bs] > 0) {
2044           baij->ilen[row/bs]       = 1;
2045           baij->j[baij->i[row/bs]] = row/bs;
2046           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2047         }
2048         /* Now insert all the diagonal values for this bs */
2049         for (k=0; k<bs; k++) {
2050           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051         }
2052       } else { /* (diag == 0.0) */
2053         baij->ilen[row/bs] = 0;
2054       } /* end (diag == 0.0) */
2055     } else { /* (sizes[i] != bs) */
2056 #if defined (PETSC_USE_DEBUG)
2057       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2058 #endif
2059       for (k=0; k<count; k++) {
2060         aa[0] =  zero;
2061         aa    += bs;
2062       }
2063       if (diag != 0.0) {
2064         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2065       }
2066     }
2067   }
2068 
2069   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2070   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2076 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2077 {
2078   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2079   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2080   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2081   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2082   PetscErrorCode ierr;
2083   PetscInt       ridx,cidx,bs2=a->bs2;
2084   PetscTruth     roworiented=a->roworiented;
2085   MatScalar      *ap,value,*aa=a->a,*bap;
2086 
2087   PetscFunctionBegin;
2088   if (v) PetscValidScalarPointer(v,6);
2089   for (k=0; k<m; k++) { /* loop over added rows */
2090     row  = im[k];
2091     brow = row/bs;
2092     if (row < 0) continue;
2093 #if defined(PETSC_USE_DEBUG)
2094     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2095 #endif
2096     rp   = aj + ai[brow];
2097     ap   = aa + bs2*ai[brow];
2098     rmax = imax[brow];
2099     nrow = ailen[brow];
2100     low  = 0;
2101     high = nrow;
2102     for (l=0; l<n; l++) { /* loop over added columns */
2103       if (in[l] < 0) continue;
2104 #if defined(PETSC_USE_DEBUG)
2105       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2106 #endif
2107       col = in[l]; bcol = col/bs;
2108       ridx = row % bs; cidx = col % bs;
2109       if (roworiented) {
2110         value = v[l + k*n];
2111       } else {
2112         value = v[k + l*m];
2113       }
2114       if (col <= lastcol) low = 0; else high = nrow;
2115       lastcol = col;
2116       while (high-low > 7) {
2117         t = (low+high)/2;
2118         if (rp[t] > bcol) high = t;
2119         else              low  = t;
2120       }
2121       for (i=low; i<high; i++) {
2122         if (rp[i] > bcol) break;
2123         if (rp[i] == bcol) {
2124           bap  = ap +  bs2*i + bs*cidx + ridx;
2125           if (is == ADD_VALUES) *bap += value;
2126           else                  *bap  = value;
2127           goto noinsert1;
2128         }
2129       }
2130       if (nonew == 1) goto noinsert1;
2131       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2133       N = nrow++ - 1; high++;
2134       /* shift up all the later entries in this row */
2135       for (ii=N; ii>=i; ii--) {
2136         rp[ii+1] = rp[ii];
2137         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2138       }
2139       if (N>=i) {
2140         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2141       }
2142       rp[i]                      = bcol;
2143       ap[bs2*i + bs*cidx + ridx] = value;
2144       a->nz++;
2145       noinsert1:;
2146       low = i;
2147     }
2148     ailen[brow] = nrow;
2149   }
2150   A->same_nonzero = PETSC_FALSE;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2156 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2157 {
2158   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2159   Mat            outA;
2160   PetscErrorCode ierr;
2161   PetscTruth     row_identity,col_identity;
2162 
2163   PetscFunctionBegin;
2164   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2165   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2166   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2167   if (!row_identity || !col_identity) {
2168     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2169   }
2170 
2171   outA            = inA;
2172   inA->factortype = MAT_FACTOR_LU;
2173 
2174   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2175 
2176   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2177   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2178   a->row = row;
2179   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2180   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2181   a->col = col;
2182 
2183   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2184   if (a->icol) {
2185     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2186   }
2187   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2189 
2190   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2191   if (!a->solve_work) {
2192     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2193     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2194   }
2195   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2196 
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 EXTERN_C_BEGIN
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2203 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2204 {
2205   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2206   PetscInt    i,nz,mbs;
2207 
2208   PetscFunctionBegin;
2209   nz  = baij->maxnz/baij->bs2;
2210   mbs = baij->mbs;
2211   for (i=0; i<nz; i++) {
2212     baij->j[i] = indices[i];
2213   }
2214   baij->nz = nz;
2215   for (i=0; i<mbs; i++) {
2216     baij->ilen[i] = baij->imax[i];
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 EXTERN_C_END
2221 
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2224 /*@
2225     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2226        in the matrix.
2227 
2228   Input Parameters:
2229 +  mat - the SeqBAIJ matrix
2230 -  indices - the column indices
2231 
2232   Level: advanced
2233 
2234   Notes:
2235     This can be called if you have precomputed the nonzero structure of the
2236   matrix and want to provide it to the matrix object to improve the performance
2237   of the MatSetValues() operation.
2238 
2239     You MUST have set the correct numbers of nonzeros per row in the call to
2240   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2241 
2242     MUST be called before any calls to MatSetValues();
2243 
2244 @*/
2245 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2246 {
2247   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2248 
2249   PetscFunctionBegin;
2250   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2251   PetscValidPointer(indices,2);
2252   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2253   if (f) {
2254     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2255   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2261 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2262 {
2263   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2264   PetscErrorCode ierr;
2265   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2266   PetscReal      atmp;
2267   PetscScalar    *x,zero = 0.0;
2268   MatScalar      *aa;
2269   PetscInt       ncols,brow,krow,kcol;
2270 
2271   PetscFunctionBegin;
2272   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2273   bs   = A->rmap->bs;
2274   aa   = a->a;
2275   ai   = a->i;
2276   aj   = a->j;
2277   mbs  = a->mbs;
2278 
2279   ierr = VecSet(v,zero);CHKERRQ(ierr);
2280   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2281   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2282   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2283   for (i=0; i<mbs; i++) {
2284     ncols = ai[1] - ai[0]; ai++;
2285     brow  = bs*i;
2286     for (j=0; j<ncols; j++){
2287       for (kcol=0; kcol<bs; kcol++){
2288         for (krow=0; krow<bs; krow++){
2289           atmp = PetscAbsScalar(*aa);aa++;
2290           row = brow + krow;    /* row index */
2291           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2292           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2293         }
2294       }
2295       aj++;
2296     }
2297   }
2298   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatCopy_SeqBAIJ"
2304 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2305 {
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   /* If the two matrices have the same copy implementation, use fast copy. */
2310   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2311     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2312     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2313 
2314     if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2315     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2316   } else {
2317     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2318   }
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2324 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2325 {
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2335 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2336 {
2337   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2338   PetscFunctionBegin;
2339   *array = a->a;
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2345 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2346 {
2347   PetscFunctionBegin;
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2353 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2354 {
2355   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2356   PetscErrorCode ierr;
2357   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2358   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2359 
2360   PetscFunctionBegin;
2361   if (str == SAME_NONZERO_PATTERN) {
2362     PetscScalar alpha = a;
2363     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2364   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2365     if (y->xtoy && y->XtoY != X) {
2366       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2367       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2368     }
2369     if (!y->xtoy) { /* get xtoy */
2370       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2371       y->XtoY = X;
2372       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2373     }
2374     bs2 = bs*bs;
2375     for (i=0; i<x->nz; i++) {
2376       j = 0;
2377       while (j < bs2){
2378         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2379         j++;
2380       }
2381     }
2382     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2383   } else {
2384     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2391 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2392 {
2393   PetscInt rbs,cbs;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2398   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2399   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2400   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2406 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2407 {
2408   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2409   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2410   MatScalar      *aa = a->a;
2411 
2412   PetscFunctionBegin;
2413   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2419 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2420 {
2421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2422   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2423   MatScalar      *aa = a->a;
2424 
2425   PetscFunctionBegin;
2426   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2434 /*
2435     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2436 */
2437 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2438 {
2439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2440   PetscErrorCode ierr;
2441   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2442   PetscInt       nz = a->i[m],row,*jj,mr,col;
2443 
2444   PetscFunctionBegin;
2445   *nn = n;
2446   if (!ia) PetscFunctionReturn(0);
2447   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2448   else {
2449     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2450     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2451     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2452     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2453     jj = a->j;
2454     for (i=0; i<nz; i++) {
2455       collengths[jj[i]]++;
2456     }
2457     cia[0] = oshift;
2458     for (i=0; i<n; i++) {
2459       cia[i+1] = cia[i] + collengths[i];
2460     }
2461     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2462     jj   = a->j;
2463     for (row=0; row<m; row++) {
2464       mr = a->i[row+1] - a->i[row];
2465       for (i=0; i<mr; i++) {
2466         col = *jj++;
2467         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2468       }
2469     }
2470     ierr = PetscFree(collengths);CHKERRQ(ierr);
2471     *ia = cia; *ja = cja;
2472   }
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2478 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2479 {
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   if (!ia) PetscFunctionReturn(0);
2484   ierr = PetscFree(*ia);CHKERRQ(ierr);
2485   ierr = PetscFree(*ja);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2491 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2492 {
2493   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2494   PetscErrorCode ierr;
2495   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2496   PetscScalar    dx,*y,*xx,*w3_array;
2497   PetscScalar    *vscale_array;
2498   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2499   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2500   void           *fctx = coloring->fctx;
2501   PetscTruth     flg = PETSC_FALSE;
2502   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2503   Vec            x1_tmp;
2504 
2505   PetscFunctionBegin;
2506   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2507   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2508   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2509   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2510 
2511   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2512   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2513   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2514   if (flg) {
2515     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2516   } else {
2517     PetscTruth assembled;
2518     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2519     if (assembled) {
2520       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2521     }
2522   }
2523 
2524   x1_tmp = x1;
2525   if (!coloring->vscale){
2526     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2527   }
2528 
2529   /*
2530     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2531     coloring->F for the coarser grids from the finest
2532   */
2533   if (coloring->F) {
2534     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2535     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2536     if (m1 != m2) {
2537       coloring->F = 0;
2538       }
2539     }
2540 
2541   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2542     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2543   }
2544   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2545 
2546   /* Set w1 = F(x1) */
2547   if (coloring->F) {
2548     w1          = coloring->F; /* use already computed value of function */
2549     coloring->F = 0;
2550   } else {
2551     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2552     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2553     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2554   }
2555 
2556   if (!coloring->w3) {
2557     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2558     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2559   }
2560   w3 = coloring->w3;
2561 
2562     CHKMEMQ;
2563     /* Compute all the local scale factors, including ghost points */
2564   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2565   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2566   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2567   if (ctype == IS_COLORING_GHOSTED){
2568     col_start = 0; col_end = N;
2569   } else if (ctype == IS_COLORING_GLOBAL){
2570     xx = xx - start;
2571     vscale_array = vscale_array - start;
2572     col_start = start; col_end = N + start;
2573   }    CHKMEMQ;
2574   for (col=col_start; col<col_end; col++){
2575     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2576     if (coloring->htype[0] == 'w') {
2577       dx = 1.0 + unorm;
2578     } else {
2579       dx  = xx[col];
2580     }
2581     if (dx == 0.0) dx = 1.0;
2582 #if !defined(PETSC_USE_COMPLEX)
2583     if (dx < umin && dx >= 0.0)      dx = umin;
2584     else if (dx < 0.0 && dx > -umin) dx = -umin;
2585 #else
2586     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2587     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2588 #endif
2589     dx               *= epsilon;
2590     vscale_array[col] = 1.0/dx;
2591   }     CHKMEMQ;
2592   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2593   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2594   if (ctype == IS_COLORING_GLOBAL){
2595     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2596     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2597   }
2598   CHKMEMQ;
2599   if (coloring->vscaleforrow) {
2600     vscaleforrow = coloring->vscaleforrow;
2601   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2602 
2603   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2604   /*
2605     Loop over each color
2606   */
2607   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2608   for (k=0; k<coloring->ncolors; k++) {
2609     coloring->currentcolor = k;
2610     for (i=0; i<bs; i++) {
2611       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2612       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2613       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2614       /*
2615 	Loop over each column associated with color
2616 	adding the perturbation to the vector w3.
2617       */
2618       for (l=0; l<coloring->ncolumns[k]; l++) {
2619 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2620 	if (coloring->htype[0] == 'w') {
2621 	  dx = 1.0 + unorm;
2622 	} else {
2623 	  dx  = xx[col];
2624 	}
2625 	if (dx == 0.0) dx = 1.0;
2626 #if !defined(PETSC_USE_COMPLEX)
2627 	if (dx < umin && dx >= 0.0)      dx = umin;
2628 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2629 #else
2630 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2631 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2632 #endif
2633 	dx            *= epsilon;
2634 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2635 	w3_array[col] += dx;
2636       }
2637       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2638       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2639 
2640       /*
2641 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2642 	w2 = F(x1 + dx) - F(x1)
2643       */
2644       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2645       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2646       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2647       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2648 
2649       /*
2650 	Loop over rows of vector, putting results into Jacobian matrix
2651       */
2652       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2653       for (l=0; l<coloring->nrows[k]; l++) {
2654 	row    = bs*coloring->rows[k][l];             /* local row index */
2655 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2656         for (j=0; j<bs; j++) {
2657   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2658           srows[j]  = row + start + j;
2659         }
2660 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2661       }
2662       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2663     }
2664   } /* endof for each color */
2665   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2666   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2667   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2668   ierr = PetscFree(srows);CHKERRQ(ierr);
2669 
2670   coloring->currentcolor = -1;
2671   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 /* -------------------------------------------------------------------*/
2678 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2679        MatGetRow_SeqBAIJ,
2680        MatRestoreRow_SeqBAIJ,
2681        MatMult_SeqBAIJ_N,
2682 /* 4*/ MatMultAdd_SeqBAIJ_N,
2683        MatMultTranspose_SeqBAIJ,
2684        MatMultTransposeAdd_SeqBAIJ,
2685        0,
2686        0,
2687        0,
2688 /*10*/ 0,
2689        MatLUFactor_SeqBAIJ,
2690        0,
2691        0,
2692        MatTranspose_SeqBAIJ,
2693 /*15*/ MatGetInfo_SeqBAIJ,
2694        MatEqual_SeqBAIJ,
2695        MatGetDiagonal_SeqBAIJ,
2696        MatDiagonalScale_SeqBAIJ,
2697        MatNorm_SeqBAIJ,
2698 /*20*/ 0,
2699        MatAssemblyEnd_SeqBAIJ,
2700        MatSetOption_SeqBAIJ,
2701        MatZeroEntries_SeqBAIJ,
2702 /*24*/ MatZeroRows_SeqBAIJ,
2703        0,
2704        0,
2705        0,
2706        0,
2707 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2708        0,
2709        0,
2710        MatGetArray_SeqBAIJ,
2711        MatRestoreArray_SeqBAIJ,
2712 /*34*/ MatDuplicate_SeqBAIJ,
2713        0,
2714        0,
2715        MatILUFactor_SeqBAIJ,
2716        0,
2717 /*39*/ MatAXPY_SeqBAIJ,
2718        MatGetSubMatrices_SeqBAIJ,
2719        MatIncreaseOverlap_SeqBAIJ,
2720        MatGetValues_SeqBAIJ,
2721        MatCopy_SeqBAIJ,
2722 /*44*/ 0,
2723        MatScale_SeqBAIJ,
2724        0,
2725        0,
2726        0,
2727 /*49*/ MatSetBlockSize_SeqBAIJ,
2728        MatGetRowIJ_SeqBAIJ,
2729        MatRestoreRowIJ_SeqBAIJ,
2730        MatGetColumnIJ_SeqBAIJ,
2731        MatRestoreColumnIJ_SeqBAIJ,
2732 /*54*/ MatFDColoringCreate_SeqAIJ,
2733        0,
2734        0,
2735        0,
2736        MatSetValuesBlocked_SeqBAIJ,
2737 /*59*/ MatGetSubMatrix_SeqBAIJ,
2738        MatDestroy_SeqBAIJ,
2739        MatView_SeqBAIJ,
2740        0,
2741        0,
2742 /*64*/ 0,
2743        0,
2744        0,
2745        0,
2746        0,
2747 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2748        0,
2749        MatConvert_Basic,
2750        0,
2751        0,
2752 /*74*/ 0,
2753        MatFDColoringApply_BAIJ,
2754        0,
2755        0,
2756        0,
2757 /*79*/ 0,
2758        0,
2759        0,
2760        0,
2761        MatLoad_SeqBAIJ,
2762 /*84*/ 0,
2763        0,
2764        0,
2765        0,
2766        0,
2767 /*89*/ 0,
2768        0,
2769        0,
2770        0,
2771        0,
2772 /*94*/ 0,
2773        0,
2774        0,
2775        0,
2776        0,
2777 /*99*/0,
2778        0,
2779        0,
2780        0,
2781        0,
2782 /*104*/0,
2783        MatRealPart_SeqBAIJ,
2784        MatImaginaryPart_SeqBAIJ,
2785        0,
2786        0,
2787 /*109*/0,
2788        0,
2789        0,
2790        0,
2791        MatMissingDiagonal_SeqBAIJ,
2792 /*114*/0,
2793        0,
2794        0,
2795        0,
2796        0,
2797 /*119*/0,
2798        0,
2799        MatMultHermitianTranspose_SeqBAIJ,
2800        MatMultHermitianTransposeAdd_SeqBAIJ
2801 };
2802 
2803 EXTERN_C_BEGIN
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2806 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2807 {
2808   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2809   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2814 
2815   /* allocate space for values if not already there */
2816   if (!aij->saved_values) {
2817     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2818     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2819   }
2820 
2821   /* copy values over */
2822   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 EXTERN_C_END
2826 
2827 EXTERN_C_BEGIN
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2830 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2831 {
2832   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2833   PetscErrorCode ierr;
2834   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2835 
2836   PetscFunctionBegin;
2837   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2838   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2839 
2840   /* copy values over */
2841   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844 EXTERN_C_END
2845 
2846 EXTERN_C_BEGIN
2847 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2848 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2849 EXTERN_C_END
2850 
2851 EXTERN_C_BEGIN
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2854 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2855 {
2856   Mat_SeqBAIJ    *b;
2857   PetscErrorCode ierr;
2858   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2859   PetscTruth     flg,skipallocation = PETSC_FALSE;
2860 
2861   PetscFunctionBegin;
2862 
2863   if (nz == MAT_SKIP_ALLOCATION) {
2864     skipallocation = PETSC_TRUE;
2865     nz             = 0;
2866   }
2867 
2868   if (bs < 0) {
2869     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2870       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2871     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2872     bs   = PetscAbs(bs);
2873   }
2874   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2875   bs   = newbs;
2876 
2877   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2878   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2879   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2880   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2881 
2882   B->preallocated = PETSC_TRUE;
2883 
2884   mbs  = B->rmap->n/bs;
2885   nbs  = B->cmap->n/bs;
2886   bs2  = bs*bs;
2887 
2888   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2889     SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2890   }
2891 
2892   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2893   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2894   if (nnz) {
2895     for (i=0; i<mbs; i++) {
2896       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2897       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2898     }
2899   }
2900 
2901   b       = (Mat_SeqBAIJ*)B->data;
2902   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2903     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2904   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2905 
2906   if (!flg) {
2907     switch (bs) {
2908     case 1:
2909       B->ops->mult            = MatMult_SeqBAIJ_1;
2910       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2911       B->ops->sor             = MatSOR_SeqBAIJ_1;
2912       break;
2913     case 2:
2914       B->ops->mult            = MatMult_SeqBAIJ_2;
2915       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2916       B->ops->sor             = MatSOR_SeqBAIJ_2;
2917       break;
2918     case 3:
2919       B->ops->mult            = MatMult_SeqBAIJ_3;
2920       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2921       B->ops->sor             = MatSOR_SeqBAIJ_3;
2922       break;
2923     case 4:
2924       B->ops->mult            = MatMult_SeqBAIJ_4;
2925       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2926       B->ops->sor             = MatSOR_SeqBAIJ_4;
2927       break;
2928     case 5:
2929       B->ops->mult            = MatMult_SeqBAIJ_5;
2930       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2931       B->ops->sor             = MatSOR_SeqBAIJ_5;
2932       break;
2933     case 6:
2934       B->ops->mult            = MatMult_SeqBAIJ_6;
2935       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2936       B->ops->sor             = MatSOR_SeqBAIJ_6;
2937       break;
2938     case 7:
2939       B->ops->mult            = MatMult_SeqBAIJ_7;
2940       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2941       B->ops->sor             = MatSOR_SeqBAIJ_7;
2942       break;
2943     case 15:
2944       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2945       break;
2946     default:
2947       B->ops->mult            = MatMult_SeqBAIJ_N;
2948       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2949       break;
2950     }
2951   }
2952   B->rmap->bs      = bs;
2953   b->mbs     = mbs;
2954   b->nbs     = nbs;
2955   if (!skipallocation) {
2956     if (!b->imax) {
2957       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2958       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2959       b->free_imax_ilen = PETSC_TRUE;
2960     }
2961     /* b->ilen will count nonzeros in each block row so far. */
2962     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2963     if (!nnz) {
2964       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2965       else if (nz <= 0)        nz = 1;
2966       for (i=0; i<mbs; i++) b->imax[i] = nz;
2967       nz = nz*mbs;
2968     } else {
2969       nz = 0;
2970       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2971     }
2972 
2973     /* allocate the matrix space */
2974     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2975     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2976     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2977     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2978     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2979     b->singlemalloc = PETSC_TRUE;
2980     b->i[0] = 0;
2981     for (i=1; i<mbs+1; i++) {
2982       b->i[i] = b->i[i-1] + b->imax[i-1];
2983     }
2984     b->free_a     = PETSC_TRUE;
2985     b->free_ij    = PETSC_TRUE;
2986   } else {
2987     b->free_a     = PETSC_FALSE;
2988     b->free_ij    = PETSC_FALSE;
2989   }
2990 
2991   B->rmap->bs          = bs;
2992   b->bs2              = bs2;
2993   b->mbs              = mbs;
2994   b->nz               = 0;
2995   b->maxnz            = nz*bs2;
2996   B->info.nz_unneeded = (PetscReal)b->maxnz;
2997   PetscFunctionReturn(0);
2998 }
2999 EXTERN_C_END
3000 
3001 EXTERN_C_BEGIN
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3004 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3005 {
3006   PetscInt       i,m,nz,nz_max=0,*nnz;
3007   PetscScalar    *values=0;
3008   PetscErrorCode ierr;
3009 
3010   PetscFunctionBegin;
3011 
3012   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3013 
3014   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3015   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3016   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3018   m = B->rmap->n/bs;
3019 
3020   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3021   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3022   for(i=0; i<m; i++) {
3023     nz = ii[i+1]- ii[i];
3024     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3025     nz_max = PetscMax(nz_max, nz);
3026     nnz[i] = nz;
3027   }
3028   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3029   ierr = PetscFree(nnz);CHKERRQ(ierr);
3030 
3031   values = (PetscScalar*)V;
3032   if (!values) {
3033     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3034     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3035   }
3036   for (i=0; i<m; i++) {
3037     PetscInt          ncols  = ii[i+1] - ii[i];
3038     const PetscInt    *icols = jj + ii[i];
3039     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3040     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3041   }
3042   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3043   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3044   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045 
3046   PetscFunctionReturn(0);
3047 }
3048 EXTERN_C_END
3049 
3050 
3051 EXTERN_C_BEGIN
3052 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3053 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3054 EXTERN_C_END
3055 
3056 /*MC
3057    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3058    block sparse compressed row format.
3059 
3060    Options Database Keys:
3061 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3062 
3063   Level: beginner
3064 
3065 .seealso: MatCreateSeqBAIJ()
3066 M*/
3067 
3068 
3069 EXTERN_C_BEGIN
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatCreate_SeqBAIJ"
3072 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3073 {
3074   PetscErrorCode ierr;
3075   PetscMPIInt    size;
3076   Mat_SeqBAIJ    *b;
3077 
3078   PetscFunctionBegin;
3079   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3080   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3081 
3082   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3083   B->data = (void*)b;
3084   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3085   B->mapping               = 0;
3086   b->row                   = 0;
3087   b->col                   = 0;
3088   b->icol                  = 0;
3089   b->reallocs              = 0;
3090   b->saved_values          = 0;
3091 
3092   b->roworiented           = PETSC_TRUE;
3093   b->nonew                 = 0;
3094   b->diag                  = 0;
3095   b->solve_work            = 0;
3096   b->mult_work             = 0;
3097   B->spptr                 = 0;
3098   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3099   b->keepnonzeropattern    = PETSC_FALSE;
3100   b->xtoy                  = 0;
3101   b->XtoY                  = 0;
3102   b->compressedrow.use     = PETSC_FALSE;
3103   b->compressedrow.nrows   = 0;
3104   b->compressedrow.i       = PETSC_NULL;
3105   b->compressedrow.rindex  = PETSC_NULL;
3106   b->compressedrow.checked = PETSC_FALSE;
3107   B->same_nonzero          = PETSC_FALSE;
3108 
3109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3110                                      "MatGetFactorAvailable_seqbaij_petsc",
3111                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3113                                      "MatGetFactor_seqbaij_petsc",
3114                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3116                                      "MatInvertBlockDiagonal_SeqBAIJ",
3117                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3119                                      "MatStoreValues_SeqBAIJ",
3120                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3122                                      "MatRetrieveValues_SeqBAIJ",
3123                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3125                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3126                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3128                                      "MatConvert_SeqBAIJ_SeqAIJ",
3129                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3131                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3132                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3134                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3135                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3137                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3138                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3139   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 EXTERN_C_END
3143 
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3146 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3147 {
3148   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3149   PetscErrorCode ierr;
3150   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3151 
3152   PetscFunctionBegin;
3153   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3154 
3155   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3156     c->imax = a->imax;
3157     c->ilen = a->ilen;
3158     c->free_imax_ilen = PETSC_FALSE;
3159   } else {
3160     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3161     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3162     for (i=0; i<mbs; i++) {
3163       c->imax[i] = a->imax[i];
3164       c->ilen[i] = a->ilen[i];
3165     }
3166     c->free_imax_ilen = PETSC_TRUE;
3167   }
3168 
3169   /* allocate the matrix space */
3170   if (mallocmatspace){
3171     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3173       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3174       c->singlemalloc = PETSC_FALSE;
3175       c->free_ij      = PETSC_FALSE;
3176       c->i            = a->i;
3177       c->j            = a->j;
3178       c->parent       = A;
3179       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3180       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3181       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3182     } else {
3183       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3184       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3185       c->singlemalloc = PETSC_TRUE;
3186       c->free_ij      = PETSC_TRUE;
3187       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3188       if (mbs > 0) {
3189 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3190 	if (cpvalues == MAT_COPY_VALUES) {
3191 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3192 	} else {
3193 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3194 	}
3195       }
3196     }
3197   }
3198 
3199   c->roworiented = a->roworiented;
3200   c->nonew       = a->nonew;
3201   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3202   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3203   c->bs2         = a->bs2;
3204   c->mbs         = a->mbs;
3205   c->nbs         = a->nbs;
3206 
3207   if (a->diag) {
3208     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3209       c->diag      = a->diag;
3210       c->free_diag = PETSC_FALSE;
3211     } else {
3212       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3213       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3214       for (i=0; i<mbs; i++) {
3215         c->diag[i] = a->diag[i];
3216       }
3217       c->free_diag = PETSC_TRUE;
3218     }
3219   } else c->diag        = 0;
3220   c->nz                 = a->nz;
3221   c->maxnz              = bs2*a->nz; /* Since we allocate exactly the right amount */
3222   c->solve_work         = 0;
3223   c->mult_work          = 0;
3224   c->free_a             = PETSC_TRUE;
3225   c->free_ij            = PETSC_TRUE;
3226   C->preallocated       = PETSC_TRUE;
3227   C->assembled          = PETSC_TRUE;
3228 
3229   c->compressedrow.use     = a->compressedrow.use;
3230   c->compressedrow.nrows   = a->compressedrow.nrows;
3231   c->compressedrow.checked = a->compressedrow.checked;
3232   if (a->compressedrow.checked && a->compressedrow.use){
3233     i = a->compressedrow.nrows;
3234     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3235     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3236     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3237     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3238   } else {
3239     c->compressedrow.use    = PETSC_FALSE;
3240     c->compressedrow.i      = PETSC_NULL;
3241     c->compressedrow.rindex = PETSC_NULL;
3242   }
3243   C->same_nonzero = A->same_nonzero;
3244   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3245   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3251 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3252 {
3253     PetscErrorCode ierr;
3254 
3255   PetscFunctionBegin;
3256   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3257   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3258   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3259   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatLoad_SeqBAIJ"
3265 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3266 {
3267   Mat_SeqBAIJ    *a;
3268   Mat            B;
3269   PetscErrorCode ierr;
3270   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3271   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3272   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3273   PetscInt       *masked,nmask,tmp,bs2,ishift;
3274   PetscMPIInt    size;
3275   int            fd;
3276   PetscScalar    *aa;
3277   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3278 
3279   PetscFunctionBegin;
3280   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3281     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3282   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3283   bs2  = bs*bs;
3284 
3285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3286   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3287   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3288   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3289   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3290   M = header[1]; N = header[2]; nz = header[3];
3291 
3292   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3293   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3294 
3295   /*
3296      This code adds extra rows to make sure the number of rows is
3297     divisible by the blocksize
3298   */
3299   mbs        = M/bs;
3300   extra_rows = bs - M + bs*(mbs);
3301   if (extra_rows == bs) extra_rows = 0;
3302   else                  mbs++;
3303   if (extra_rows) {
3304     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3305   }
3306 
3307   /* read in row lengths */
3308   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3309   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3310   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3311 
3312   /* read in column indices */
3313   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3314   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3315   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3316 
3317   /* loop over row lengths determining block row lengths */
3318   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3319   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3320   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3321   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3322   rowcount = 0;
3323   nzcount = 0;
3324   for (i=0; i<mbs; i++) {
3325     nmask = 0;
3326     for (j=0; j<bs; j++) {
3327       kmax = rowlengths[rowcount];
3328       for (k=0; k<kmax; k++) {
3329         tmp = jj[nzcount++]/bs;
3330         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3331       }
3332       rowcount++;
3333     }
3334     browlengths[i] += nmask;
3335     /* zero out the mask elements we set */
3336     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3337   }
3338 
3339   /* create our matrix */
3340   ierr = MatCreate(comm,&B);
3341   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3342   ierr = MatSetType(B,type);CHKERRQ(ierr);
3343   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3344   a = (Mat_SeqBAIJ*)B->data;
3345 
3346   /* set matrix "i" values */
3347   a->i[0] = 0;
3348   for (i=1; i<= mbs; i++) {
3349     a->i[i]      = a->i[i-1] + browlengths[i-1];
3350     a->ilen[i-1] = browlengths[i-1];
3351   }
3352   a->nz         = 0;
3353   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3354 
3355   /* read in nonzero values */
3356   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3357   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3358   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3359 
3360   /* set "a" and "j" values into matrix */
3361   nzcount = 0; jcount = 0;
3362   for (i=0; i<mbs; i++) {
3363     nzcountb = nzcount;
3364     nmask    = 0;
3365     for (j=0; j<bs; j++) {
3366       kmax = rowlengths[i*bs+j];
3367       for (k=0; k<kmax; k++) {
3368         tmp = jj[nzcount++]/bs;
3369 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3370       }
3371     }
3372     /* sort the masked values */
3373     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3374 
3375     /* set "j" values into matrix */
3376     maskcount = 1;
3377     for (j=0; j<nmask; j++) {
3378       a->j[jcount++]  = masked[j];
3379       mask[masked[j]] = maskcount++;
3380     }
3381     /* set "a" values into matrix */
3382     ishift = bs2*a->i[i];
3383     for (j=0; j<bs; j++) {
3384       kmax = rowlengths[i*bs+j];
3385       for (k=0; k<kmax; k++) {
3386         tmp       = jj[nzcountb]/bs ;
3387         block     = mask[tmp] - 1;
3388         point     = jj[nzcountb] - bs*tmp;
3389         idx       = ishift + bs2*block + j + bs*point;
3390         a->a[idx] = (MatScalar)aa[nzcountb++];
3391       }
3392     }
3393     /* zero out the mask elements we set */
3394     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3395   }
3396   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3397 
3398   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3399   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3400   ierr = PetscFree(aa);CHKERRQ(ierr);
3401   ierr = PetscFree(jj);CHKERRQ(ierr);
3402   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3403 
3404   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3405   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3406   ierr = MatView_Private(B);CHKERRQ(ierr);
3407 
3408   *A = B;
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatCreateSeqBAIJ"
3414 /*@C
3415    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3416    compressed row) format.  For good matrix assembly performance the
3417    user should preallocate the matrix storage by setting the parameter nz
3418    (or the array nnz).  By setting these parameters accurately, performance
3419    during matrix assembly can be increased by more than a factor of 50.
3420 
3421    Collective on MPI_Comm
3422 
3423    Input Parameters:
3424 +  comm - MPI communicator, set to PETSC_COMM_SELF
3425 .  bs - size of block
3426 .  m - number of rows
3427 .  n - number of columns
3428 .  nz - number of nonzero blocks  per block row (same for all rows)
3429 -  nnz - array containing the number of nonzero blocks in the various block rows
3430          (possibly different for each block row) or PETSC_NULL
3431 
3432    Output Parameter:
3433 .  A - the matrix
3434 
3435    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3436    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3437    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3438 
3439    Options Database Keys:
3440 .   -mat_no_unroll - uses code that does not unroll the loops in the
3441                      block calculations (much slower)
3442 .    -mat_block_size - size of the blocks to use
3443 
3444    Level: intermediate
3445 
3446    Notes:
3447    The number of rows and columns must be divisible by blocksize.
3448 
3449    If the nnz parameter is given then the nz parameter is ignored
3450 
3451    A nonzero block is any block that as 1 or more nonzeros in it
3452 
3453    The block AIJ format is fully compatible with standard Fortran 77
3454    storage.  That is, the stored row and column indices can begin at
3455    either one (as in Fortran) or zero.  See the users' manual for details.
3456 
3457    Specify the preallocated storage with either nz or nnz (not both).
3458    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3459    allocation.  For additional details, see the users manual chapter on
3460    matrices.
3461 
3462 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3463 @*/
3464 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3465 {
3466   PetscErrorCode ierr;
3467 
3468   PetscFunctionBegin;
3469   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3470   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3471   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3472   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 #undef __FUNCT__
3477 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3478 /*@C
3479    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3480    per row in the matrix. For good matrix assembly performance the
3481    user should preallocate the matrix storage by setting the parameter nz
3482    (or the array nnz).  By setting these parameters accurately, performance
3483    during matrix assembly can be increased by more than a factor of 50.
3484 
3485    Collective on MPI_Comm
3486 
3487    Input Parameters:
3488 +  A - the matrix
3489 .  bs - size of block
3490 .  nz - number of block nonzeros per block row (same for all rows)
3491 -  nnz - array containing the number of block nonzeros in the various block rows
3492          (possibly different for each block row) or PETSC_NULL
3493 
3494    Options Database Keys:
3495 .   -mat_no_unroll - uses code that does not unroll the loops in the
3496                      block calculations (much slower)
3497 .    -mat_block_size - size of the blocks to use
3498 
3499    Level: intermediate
3500 
3501    Notes:
3502    If the nnz parameter is given then the nz parameter is ignored
3503 
3504    You can call MatGetInfo() to get information on how effective the preallocation was;
3505    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3506    You can also run with the option -info and look for messages with the string
3507    malloc in them to see if additional memory allocation was needed.
3508 
3509    The block AIJ format is fully compatible with standard Fortran 77
3510    storage.  That is, the stored row and column indices can begin at
3511    either one (as in Fortran) or zero.  See the users' manual for details.
3512 
3513    Specify the preallocated storage with either nz or nnz (not both).
3514    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3515    allocation.  For additional details, see the users manual chapter on
3516    matrices.
3517 
3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3519 @*/
3520 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3521 {
3522   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3523 
3524   PetscFunctionBegin;
3525   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3526   if (f) {
3527     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3528   }
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 #undef __FUNCT__
3533 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3534 /*@C
3535    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3536    (the default sequential PETSc format).
3537 
3538    Collective on MPI_Comm
3539 
3540    Input Parameters:
3541 +  A - the matrix
3542 .  i - the indices into j for the start of each local row (starts with zero)
3543 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3544 -  v - optional values in the matrix
3545 
3546    Level: developer
3547 
3548 .keywords: matrix, aij, compressed row, sparse
3549 
3550 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3551 @*/
3552 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3553 {
3554   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3555 
3556   PetscFunctionBegin;
3557   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3558   if (f) {
3559     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3560   }
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3567 /*@
3568      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3569               (upper triangular entries in CSR format) provided by the user.
3570 
3571      Collective on MPI_Comm
3572 
3573    Input Parameters:
3574 +  comm - must be an MPI communicator of size 1
3575 .  bs - size of block
3576 .  m - number of rows
3577 .  n - number of columns
3578 .  i - row indices
3579 .  j - column indices
3580 -  a - matrix values
3581 
3582    Output Parameter:
3583 .  mat - the matrix
3584 
3585    Level: intermediate
3586 
3587    Notes:
3588        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3589     once the matrix is destroyed
3590 
3591        You cannot set new nonzero locations into this matrix, that will generate an error.
3592 
3593        The i and j indices are 0 based
3594 
3595 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3596 
3597 @*/
3598 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3599 {
3600   PetscErrorCode ierr;
3601   PetscInt       ii;
3602   Mat_SeqBAIJ    *baij;
3603 
3604   PetscFunctionBegin;
3605   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3606   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3607 
3608   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3609   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3610   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3611   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3612   baij = (Mat_SeqBAIJ*)(*mat)->data;
3613   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3614   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3615 
3616   baij->i = i;
3617   baij->j = j;
3618   baij->a = a;
3619   baij->singlemalloc = PETSC_FALSE;
3620   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3621   baij->free_a       = PETSC_FALSE;
3622   baij->free_ij       = PETSC_FALSE;
3623 
3624   for (ii=0; ii<m; ii++) {
3625     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3626 #if defined(PETSC_USE_DEBUG)
3627     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3628 #endif
3629   }
3630 #if defined(PETSC_USE_DEBUG)
3631   for (ii=0; ii<baij->i[m]; ii++) {
3632     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3633     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3634   }
3635 #endif
3636 
3637   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639   PetscFunctionReturn(0);
3640 }
3641