xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0e97c5f4ea7aec56dfb77b4089b31f0ca8c13ef5)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "../src/mat/blockinvert.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr,(*f)(Mat);
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
28   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30 
31   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
32   if (f) {
33     ierr = (*f)(mat);CHKERRQ(ierr);
34   } else {
35     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
36   }
37   PetscFunctionReturn(0);
38 }
39 
40 EXTERN_C_BEGIN
41 #undef __FUNCT__
42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
44 {
45   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
46   PetscErrorCode ierr;
47   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
48   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
49   PetscReal      shift = 0.0;
50 
51   PetscFunctionBegin;
52   if (a->idiagvalid) PetscFunctionReturn(0);
53   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54   diag_offset = a->diag;
55   if (!a->idiag) {
56     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
58   }
59   diag  = a->idiag;
60   mdiag = a->idiag+bs*bs*mbs;
61   /* factor and invert each block */
62   switch (bs){
63     case 1:
64       for (i=0; i<mbs; i++) {
65         odiag = v + 1*diag_offset[i];
66         diag[0]  = odiag[0];
67         mdiag[0] = odiag[0];
68         diag[0]  = 1.0 / (diag[0] + shift);
69         diag    += 1;
70         mdiag   += 1;
71       }
72       break;
73     case 2:
74       for (i=0; i<mbs; i++) {
75         odiag   = v + 4*diag_offset[i];
76         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
79 	diag    += 4;
80 	mdiag   += 4;
81       }
82       break;
83     case 3:
84       for (i=0; i<mbs; i++) {
85         odiag    = v + 9*diag_offset[i];
86         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
87         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
88         diag[8]  = odiag[8];
89         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
90         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
91         mdiag[8] = odiag[8];
92 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
93 	diag    += 9;
94 	mdiag   += 9;
95       }
96       break;
97     case 4:
98       for (i=0; i<mbs; i++) {
99         odiag  = v + 16*diag_offset[i];
100         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
101         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
103 	diag  += 16;
104 	mdiag += 16;
105       }
106       break;
107     case 5:
108       for (i=0; i<mbs; i++) {
109         odiag = v + 25*diag_offset[i];
110         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
111         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
113 	diag  += 25;
114 	mdiag += 25;
115       }
116       break;
117     case 6:
118       for (i=0; i<mbs; i++) {
119         odiag = v + 36*diag_offset[i];
120         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
121         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
123 	diag  += 36;
124 	mdiag += 36;
125       }
126       break;
127     default:
128       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 EXTERN_C_END
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
138 {
139   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
140   PetscScalar        *x,x1,s1;
141   const PetscScalar  *b;
142   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
143   PetscErrorCode     ierr;
144   PetscInt           m = a->mbs,i,i2,nz,j;
145   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
149   its = its*lits;
150   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
155 
156   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
157 
158   diag  = a->diag;
159   idiag = a->idiag;
160   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
162 
163   if (flag & SOR_ZERO_INITIAL_GUESS) {
164     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
165       x[0] = b[0]*idiag[0];
166       i2     = 1;
167       idiag += 1;
168       for (i=1; i<m; i++) {
169         v     = aa + ai[i];
170         vi    = aj + ai[i];
171         nz    = diag[i] - ai[i];
172         s1    = b[i2];
173         for (j=0; j<nz; j++) {
174           s1 -= v[j]*x[vi[j]];
175         }
176         x[i2]   = idiag[0]*s1;
177         idiag   += 1;
178         i2      += 1;
179       }
180       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
181       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
182     }
183     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
184         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
185       i2    = 0;
186       mdiag = a->idiag+a->mbs;
187       for (i=0; i<m; i++) {
188         x1      = x[i2];
189         x[i2]   = mdiag[0]*x1;
190         mdiag  += 1;
191         i2     += 1;
192       }
193       ierr = PetscLogFlops(m);CHKERRQ(ierr);
194     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
195       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
196     }
197     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
198       idiag   = a->idiag+a->mbs - 1;
199       i2      = m - 1;
200       x1      = x[i2];
201       x[i2]   = idiag[0]*x1;
202       idiag -= 1;
203       i2    -= 1;
204       for (i=m-2; i>=0; i--) {
205         v     = aa + (diag[i]+1);
206         vi    = aj + diag[i] + 1;
207         nz    = ai[i+1] - diag[i] - 1;
208         s1    = x[i2];
209         for (j=0; j<nz; j++) {
210           s1 -= v[j]*x[vi[j]];
211         }
212         x[i2]   = idiag[0]*s1;
213         idiag   -= 1;
214         i2      -= 1;
215       }
216       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
217     }
218   } else {
219     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
220   }
221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
222   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
229 {
230   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
231   PetscScalar        *x,x1,x2,s1,s2;
232   const PetscScalar  *b;
233   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
234   PetscErrorCode     ierr;
235   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
236   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
237 
238   PetscFunctionBegin;
239   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
240   its = its*lits;
241   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
242   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
243   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
244   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
245   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
246 
247   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
248 
249   diag  = a->diag;
250   idiag = a->idiag;
251   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
253 
254   if (flag & SOR_ZERO_INITIAL_GUESS) {
255     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
256       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
257       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
258       i2     = 2;
259       idiag += 4;
260       for (i=1; i<m; i++) {
261 	v     = aa + 4*ai[i];
262 	vi    = aj + ai[i];
263 	nz    = diag[i] - ai[i];
264 	s1    = b[i2]; s2 = b[i2+1];
265         for (j=0; j<nz; j++) {
266 	  idx  = 2*vi[j];
267           it   = 4*j;
268 	  x1   = x[idx]; x2 = x[1+idx];
269 	  s1  -= v[it]*x1 + v[it+2]*x2;
270 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
271 	}
272 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
273 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
274         idiag   += 4;
275         i2      += 2;
276       }
277       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
278       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
279     }
280     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
281         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
282       i2    = 0;
283       mdiag = a->idiag+4*a->mbs;
284       for (i=0; i<m; i++) {
285         x1      = x[i2]; x2 = x[i2+1];
286         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
287         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
288         mdiag  += 4;
289         i2     += 2;
290       }
291       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
292     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
293       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
294     }
295     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
296       idiag   = a->idiag+4*a->mbs - 4;
297       i2      = 2*m - 2;
298       x1      = x[i2]; x2 = x[i2+1];
299       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
300       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
301       idiag -= 4;
302       i2    -= 2;
303       for (i=m-2; i>=0; i--) {
304 	v     = aa + 4*(diag[i]+1);
305 	vi    = aj + diag[i] + 1;
306 	nz    = ai[i+1] - diag[i] - 1;
307 	s1    = x[i2]; s2 = x[i2+1];
308         for (j=0; j<nz; j++) {
309  	  idx  = 2*vi[j];
310           it   = 4*j;
311 	  x1   = x[idx]; x2 = x[1+idx];
312 	  s1  -= v[it]*x1 + v[it+2]*x2;
313 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
314 	}
315 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
316 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
317         idiag   -= 4;
318         i2      -= 2;
319       }
320       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
321     }
322   } else {
323     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
324   }
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
333 {
334   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
335   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
336   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
337   const PetscScalar  *b;
338   PetscErrorCode     ierr;
339   PetscInt           m = a->mbs,i,i2,nz,idx;
340   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
341 
342   PetscFunctionBegin;
343   its = its*lits;
344   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
345   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
346   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
347   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
348   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
349   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
350 
351   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
352 
353   diag  = a->diag;
354   idiag = a->idiag;
355   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
357 
358   if (flag & SOR_ZERO_INITIAL_GUESS) {
359     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
360       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
361       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
362       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
363       i2     = 3;
364       idiag += 9;
365       for (i=1; i<m; i++) {
366         v     = aa + 9*ai[i];
367         vi    = aj + ai[i];
368         nz    = diag[i] - ai[i];
369         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
370         while (nz--) {
371           idx  = 3*(*vi++);
372           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
373           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
374           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
375           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
376           v   += 9;
377         }
378         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
379         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
380         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
381         idiag   += 9;
382         i2      += 3;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
386     }
387     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
388         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
389       i2    = 0;
390       mdiag = a->idiag+9*a->mbs;
391       for (i=0; i<m; i++) {
392         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
393         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
394         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
395         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
396         mdiag  += 9;
397         i2     += 3;
398       }
399       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+9*a->mbs - 9;
405       i2      = 3*m - 3;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
408       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
409       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
410       idiag -= 9;
411       i2    -= 3;
412       for (i=m-2; i>=0; i--) {
413         v     = aa + 9*(diag[i]+1);
414         vi    = aj + diag[i] + 1;
415         nz    = ai[i+1] - diag[i] - 1;
416         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
417         while (nz--) {
418           idx  = 3*(*vi++);
419           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
420           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
421           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
422           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
423           v   += 9;
424         }
425         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
426         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
427         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
428         idiag   -= 9;
429         i2      -= 3;
430       }
431       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
432     }
433   } else {
434     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435   }
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444 {
445   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
447   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
448   const PetscScalar  *b;
449   PetscErrorCode     ierr;
450   PetscInt           m = a->mbs,i,i2,nz,idx;
451   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
452 
453   PetscFunctionBegin;
454   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
475       i2     = 4;
476       idiag += 16;
477       for (i=1; i<m; i++) {
478 	v     = aa + 16*ai[i];
479 	vi    = aj + ai[i];
480 	nz    = diag[i] - ai[i];
481 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
482 	while (nz--) {
483 	  idx  = 4*(*vi++);
484 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
485 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
486 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
487 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
488 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
489 	  v   += 16;
490 	}
491 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
492 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
493 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
494 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
495         idiag   += 16;
496         i2      += 4;
497       }
498       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
499       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
500     }
501     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
502         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
503       i2    = 0;
504       mdiag = a->idiag+16*a->mbs;
505       for (i=0; i<m; i++) {
506         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
507         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
508         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
509         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
510         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
511         mdiag  += 16;
512         i2     += 4;
513       }
514       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
515     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
516       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
517     }
518     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
519       idiag   = a->idiag+16*a->mbs - 16;
520       i2      = 4*m - 4;
521       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
522       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
523       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
524       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
525       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
526       idiag -= 16;
527       i2    -= 4;
528       for (i=m-2; i>=0; i--) {
529 	v     = aa + 16*(diag[i]+1);
530 	vi    = aj + diag[i] + 1;
531 	nz    = ai[i+1] - diag[i] - 1;
532 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
533 	while (nz--) {
534 	  idx  = 4*(*vi++);
535 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
536 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
537 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
538 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
539 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
540 	  v   += 16;
541 	}
542 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
543 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
544 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
545 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
546         idiag   -= 16;
547         i2      -= 4;
548       }
549       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
550     }
551   } else {
552     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
553   }
554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
562 {
563   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
564   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
565   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
566   const PetscScalar  *b;
567   PetscErrorCode     ierr;
568   PetscInt           m = a->mbs,i,i2,nz,idx;
569   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
570 
571   PetscFunctionBegin;
572   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
573   its = its*lits;
574   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
575   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
576   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
577   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
578   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
579 
580   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
581 
582   diag  = a->diag;
583   idiag = a->idiag;
584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
586 
587   if (flag & SOR_ZERO_INITIAL_GUESS) {
588     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
589       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
590       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
591       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
592       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
593       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
594       i2     = 5;
595       idiag += 25;
596       for (i=1; i<m; i++) {
597 	v     = aa + 25*ai[i];
598 	vi    = aj + ai[i];
599 	nz    = diag[i] - ai[i];
600 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
601 	while (nz--) {
602 	  idx  = 5*(*vi++);
603 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
604 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
605 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
606 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
607 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
608 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
609 	  v   += 25;
610 	}
611 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
612 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
613 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
614 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
615 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
616         idiag   += 25;
617         i2      += 5;
618       }
619       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
620       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
621     }
622     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
623         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
624       i2    = 0;
625       mdiag = a->idiag+25*a->mbs;
626       for (i=0; i<m; i++) {
627         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
629         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
630         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
631         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
632         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
633         mdiag  += 25;
634         i2     += 5;
635       }
636       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
637     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
638       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
639     }
640     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
641       idiag   = a->idiag+25*a->mbs - 25;
642       i2      = 5*m - 5;
643       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
644       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
645       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
646       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
647       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
648       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
649       idiag -= 25;
650       i2    -= 5;
651       for (i=m-2; i>=0; i--) {
652 	v     = aa + 25*(diag[i]+1);
653 	vi    = aj + diag[i] + 1;
654 	nz    = ai[i+1] - diag[i] - 1;
655 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
656 	while (nz--) {
657 	  idx  = 5*(*vi++);
658 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
659 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
660 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
661 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
662 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
663 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
664 	  v   += 25;
665 	}
666 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
667 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
668 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
669 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
670 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
671         idiag   -= 25;
672         i2      -= 5;
673       }
674       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
675     }
676   } else {
677     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
678   }
679   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
687 {
688   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
689   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
690   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
691   const PetscScalar  *b;
692   PetscErrorCode     ierr;
693   PetscInt           m = a->mbs,i,i2,nz,idx;
694   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
695 
696   PetscFunctionBegin;
697   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
698   its = its*lits;
699   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
700   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
701   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
702   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
703   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
704 
705   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
706 
707   diag  = a->diag;
708   idiag = a->idiag;
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
711 
712   if (flag & SOR_ZERO_INITIAL_GUESS) {
713     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
714       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
715       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
716       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
717       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
718       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
719       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
720       i2     = 6;
721       idiag += 36;
722       for (i=1; i<m; i++) {
723         v     = aa + 36*ai[i];
724         vi    = aj + ai[i];
725         nz    = diag[i] - ai[i];
726         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
727         while (nz--) {
728           idx  = 6*(*vi++);
729           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
730           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
731           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
732           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
733           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
734           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
735           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
736           v   += 36;
737         }
738         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
739         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
740         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
741         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
742         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
743         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
744         idiag   += 36;
745         i2      += 6;
746       }
747       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
748       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
749     }
750     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
751         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
752       i2    = 0;
753       mdiag = a->idiag+36*a->mbs;
754       for (i=0; i<m; i++) {
755         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
756         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
757         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
758         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
759         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
760         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
761         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
762         mdiag  += 36;
763         i2     += 6;
764       }
765       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
766     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
767       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
768     }
769     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
770       idiag   = a->idiag+36*a->mbs - 36;
771       i2      = 6*m - 6;
772       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
773       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
774       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
775       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
776       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
777       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
778       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
779       idiag -= 36;
780       i2    -= 6;
781       for (i=m-2; i>=0; i--) {
782         v     = aa + 36*(diag[i]+1);
783         vi    = aj + diag[i] + 1;
784         nz    = ai[i+1] - diag[i] - 1;
785         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
786         while (nz--) {
787           idx  = 6*(*vi++);
788           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
789           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
790           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
791           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
792           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
793           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
794           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
795           v   += 36;
796         }
797         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
798         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
799         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
800         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
801         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
802         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
803         idiag   -= 36;
804         i2      -= 6;
805       }
806       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
807     }
808   } else {
809     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
810   }
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
819 {
820   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
821   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
822   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
823   const PetscScalar  *b;
824   PetscErrorCode     ierr;
825   PetscInt           m = a->mbs,i,i2,nz,idx;
826   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
827 
828   PetscFunctionBegin;
829   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
830   its = its*lits;
831   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
832   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
833   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
834   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
835   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
836 
837   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
838 
839   diag  = a->diag;
840   idiag = a->idiag;
841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
842   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
846       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
847       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
848       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
849       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
850       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
851       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
852       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
853       i2     = 7;
854       idiag += 49;
855       for (i=1; i<m; i++) {
856         v     = aa + 49*ai[i];
857         vi    = aj + ai[i];
858         nz    = diag[i] - ai[i];
859         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
860         while (nz--) {
861           idx  = 7*(*vi++);
862           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
863           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
864           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
865           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
866           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
867           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
868           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
869           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
870           v   += 49;
871         }
872         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
873         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
874         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
875         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
876         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
877         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
878         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
879         idiag   += 49;
880         i2      += 7;
881       }
882       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
883       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
884     }
885     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
886         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
887       i2    = 0;
888       mdiag = a->idiag+49*a->mbs;
889       for (i=0; i<m; i++) {
890         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
891         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
892         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
893         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
894         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
895         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
896         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
897         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
898         mdiag  += 36;
899         i2     += 6;
900       }
901       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
902     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
903       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
904     }
905     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
906       idiag   = a->idiag+49*a->mbs - 49;
907       i2      = 7*m - 7;
908       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
909       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
910       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
911       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
912       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
913       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
914       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
915       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
916       idiag -= 49;
917       i2    -= 7;
918       for (i=m-2; i>=0; i--) {
919         v     = aa + 49*(diag[i]+1);
920         vi    = aj + diag[i] + 1;
921         nz    = ai[i+1] - diag[i] - 1;
922         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
923         while (nz--) {
924           idx  = 7*(*vi++);
925           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
926           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
927           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
928           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
929           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
930           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
931           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
932           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
933           v   += 49;
934         }
935         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
936         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
937         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
938         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
939         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
940         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
941         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
942         idiag   -= 49;
943         i2      -= 7;
944       }
945       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
946     }
947   } else {
948     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
949   }
950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
951   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "matsetvaluesblocked4_"
967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai=a->i,*ailen=a->ilen;
973   PetscInt          *aj=a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976 
977   PetscFunctionBegin;
978   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
979   stepval = (n-1)*4;
980   for (k=0; k<m; k++) { /* loop over added rows */
981     row  = im[k];
982     rp   = aj + ai[row];
983     ap   = aa + 16*ai[row];
984     nrow = ailen[row];
985     low  = 0;
986     high = nrow;
987     for (l=0; l<n; l++) { /* loop over added columns */
988       col = in[l];
989       if (col <= lastcol) low = 0; else high = nrow;
990       lastcol = col;
991       value = v + k*(stepval+4 + l)*4;
992       while (high-low > 7) {
993         t = (low+high)/2;
994         if (rp[t] > col) high = t;
995         else             low  = t;
996       }
997       for (i=low; i<high; i++) {
998         if (rp[i] > col) break;
999         if (rp[i] == col) {
1000           bap  = ap +  16*i;
1001           for (ii=0; ii<4; ii++,value+=stepval) {
1002             for (jj=ii; jj<16; jj+=4) {
1003               bap[jj] += *value++;
1004             }
1005           }
1006           goto noinsert2;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column index thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N >= i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i] = col;
1020       bap   = ap +  16*i;
1021       for (ii=0; ii<4; ii++,value+=stepval) {
1022         for (jj=ii; jj<16; jj+=4) {
1023           bap[jj] = *value++;
1024         }
1025       }
1026       noinsert2:;
1027       low = i;
1028     }
1029     ailen[row] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 EXTERN_C_END
1034 
1035 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1036 #define matsetvalues4_ MATSETVALUES4
1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1038 #define matsetvalues4_ matsetvalues4
1039 #endif
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatSetValues4_"
1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1045 {
1046   Mat         A = *AA;
1047   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1048   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1049   PetscInt    *ai=a->i,*ailen=a->ilen;
1050   PetscInt    *aj=a->j,brow,bcol;
1051   PetscInt    ridx,cidx,lastcol = -1;
1052   MatScalar   *ap,value,*aa=a->a,*bap;
1053 
1054   PetscFunctionBegin;
1055   for (k=0; k<m; k++) { /* loop over added rows */
1056     row  = im[k]; brow = row/4;
1057     rp   = aj + ai[brow];
1058     ap   = aa + 16*ai[brow];
1059     nrow = ailen[brow];
1060     low  = 0;
1061     high = nrow;
1062     for (l=0; l<n; l++) { /* loop over added columns */
1063       col = in[l]; bcol = col/4;
1064       ridx = row % 4; cidx = col % 4;
1065       value = v[l + k*n];
1066       if (col <= lastcol) low = 0; else high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap  = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       for (ii=N; ii>=i; ii--) {
1085         rp[ii+1] = rp[ii];
1086         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1087       }
1088       if (N>=i) {
1089         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1090       }
1091       rp[i]                    = bcol;
1092       ap[16*i + 4*cidx + ridx] = value;
1093       noinsert1:;
1094       low = i;
1095     }
1096     ailen[brow] = nrow;
1097   }
1098   PetscFunctionReturnVoid();
1099 }
1100 EXTERN_C_END
1101 
1102 /*
1103      Checks for missing diagonals
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1108 {
1109   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       *diag,*jj = a->j,i;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1115   *missing = PETSC_FALSE;
1116   if (A->rmap->n > 0 && !jj) {
1117     *missing  = PETSC_TRUE;
1118     if (d) *d = 0;
1119     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1120   } else {
1121     diag     = a->diag;
1122     for (i=0; i<a->mbs; i++) {
1123       if (jj[diag[i]] != i) {
1124         *missing  = PETSC_TRUE;
1125         if (d) *d = i;
1126         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1136 {
1137   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   PetscInt       i,j,m = a->mbs;
1140 
1141   PetscFunctionBegin;
1142   if (!a->diag) {
1143     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1144     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1145     a->free_diag = PETSC_TRUE;
1146   }
1147   for (i=0; i<m; i++) {
1148     a->diag[i] = a->i[i+1];
1149     for (j=a->i[i]; j<a->i[i+1]; j++) {
1150       if (a->j[j] == i) {
1151         a->diag[i] = j;
1152         break;
1153       }
1154     }
1155   }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 
1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1165 {
1166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1169   PetscInt       *tia, *tja;
1170 
1171   PetscFunctionBegin;
1172   *nn = n;
1173   if (!ia) PetscFunctionReturn(0);
1174   if (symmetric) {
1175     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1176     nz   = tia[n];
1177   } else {
1178     tia = a->i; tja = a->j;
1179   }
1180 
1181   if (!blockcompressed && bs > 1) {
1182     (*nn) *= bs;
1183     nbs    = bs;
1184     /* malloc & create the natural set of indices */
1185     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1186     if (n) {
1187       (*ia)[0] = 0;
1188       for (j=1; j<bs; j++) {
1189         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1190       }
1191     }
1192 
1193     for (i=1; i<n; i++) {
1194       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1195       for (j=1; j<bs; j++) {
1196         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1197       }
1198     }
1199     if (n) {
1200       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1201     }
1202 
1203     if (ja) {
1204       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1205       cnt = 0;
1206       for (i=0; i<n; i++) {
1207         for (j=0; j<bs; j++) {
1208           for (k=tia[i]; k<tia[i+1]; k++) {
1209             for (l=0; l<bs; l++) {
1210               (*ja)[cnt++] = bs*tja[k] + l;
1211 	    }
1212           }
1213         }
1214       }
1215     }
1216 
1217     n     *= bs;
1218     nz *= bs*bs;
1219     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1220       ierr = PetscFree(tia);CHKERRQ(ierr);
1221       ierr = PetscFree(tja);CHKERRQ(ierr);
1222     }
1223   } else if (oshift == 1) {
1224     if (symmetric) {
1225       PetscInt nz = tia[A->rmap->n/bs];
1226       /*  add 1 to i and j indices */
1227       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1228       *ia = tia;
1229       if (ja) {
1230 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1231         *ja = tja;
1232       }
1233     } else {
1234       PetscInt nz = a->i[A->rmap->n/bs];
1235       /* malloc space and  add 1 to i and j indices */
1236       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1237       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1238       if (ja) {
1239 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1240 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1241       }
1242     }
1243   } else {
1244     *ia = tia;
1245     if (ja) *ja = tja;
1246   }
1247 
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1254 {
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   if (!ia) PetscFunctionReturn(0);
1259   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1260     ierr = PetscFree(*ia);CHKERRQ(ierr);
1261     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1269 {
1270   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1276 #endif
1277   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1278   if (a->row) {
1279     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1280   }
1281   if (a->col) {
1282     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1283   }
1284   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1286   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1288   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1289   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1290   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1291   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1292   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1293 
1294   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1295   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1296   ierr = PetscFree(a);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   switch (op) {
1319   case MAT_ROW_ORIENTED:
1320     a->roworiented    = flg;
1321     break;
1322   case MAT_KEEP_NONZERO_PATTERN:
1323     a->keepnonzeropattern = flg;
1324     break;
1325   case MAT_NEW_NONZERO_LOCATIONS:
1326     a->nonew          = (flg ? 0 : 1);
1327     break;
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     a->nonew          = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332     a->nonew          = (flg ? -2 : 0);
1333     break;
1334   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1335     a->nounused       = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_DIAGONALS:
1338   case MAT_IGNORE_OFF_PROC_ENTRIES:
1339   case MAT_USE_HASH_TABLE:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   default:
1349     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1361   MatScalar      *aa,*aa_i;
1362   PetscScalar    *v_i;
1363 
1364   PetscFunctionBegin;
1365   bs  = A->rmap->bs;
1366   ai  = a->i;
1367   aj  = a->j;
1368   aa  = a->a;
1369   bs2 = a->bs2;
1370 
1371   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1372 
1373   bn  = row/bs;   /* Block number */
1374   bp  = row % bs; /* Block Position */
1375   M   = ai[bn+1] - ai[bn];
1376   *nz = bs*M;
1377 
1378   if (v) {
1379     *v = 0;
1380     if (*nz) {
1381       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1382       for (i=0; i<M; i++) { /* for each block in the block row */
1383         v_i  = *v + i*bs;
1384         aa_i = aa + bs2*(ai[bn] + i);
1385         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1386       }
1387     }
1388   }
1389 
1390   if (idx) {
1391     *idx = 0;
1392     if (*nz) {
1393       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1394       for (i=0; i<M; i++) { /* for each block in the block row */
1395         idx_i = *idx + i*bs;
1396         itmp  = bs*aj[ai[bn] + i];
1397         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1398       }
1399     }
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1412   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1421 {
1422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1423   Mat            C;
1424   PetscErrorCode ierr;
1425   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1426   PetscInt       *rows,*cols,bs2=a->bs2;
1427   MatScalar      *array;
1428 
1429   PetscFunctionBegin;
1430   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1431   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1432     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1433     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1434 
1435     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1436     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1437     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1438     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1440     ierr = PetscFree(col);CHKERRQ(ierr);
1441   } else {
1442     C = *B;
1443   }
1444 
1445   array = a->a;
1446   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_COOKIE;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #define CHUNKSIZE 10
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) {
1947     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   }
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952   a->reallocs          = 0;
1953   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1954 
1955   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1956   if (a->compressedrow.use){
1957     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959 
1960   A->same_nonzero = PETSC_TRUE;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt   i,j,k,row;
1975   PetscTruth flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i+= bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2010 {
2011   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,k,count,*rows;
2014   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar    zero = 0.0;
2016   MatScalar      *aa;
2017 
2018   PetscFunctionBegin;
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026   if (baij->keepnonzeropattern) {
2027     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2028     bs_max = is_n;
2029     A->same_nonzero = PETSC_TRUE;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->same_nonzero = PETSC_FALSE;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row   = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != 0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2046         }
2047         /* Now insert all the diagonal values for this bs */
2048         for (k=0; k<bs; k++) {
2049           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050         }
2051       } else { /* (diag == 0.0) */
2052         baij->ilen[row/bs] = 0;
2053       } /* end (diag == 0.0) */
2054     } else { /* (sizes[i] != bs) */
2055 #if defined (PETSC_USE_DEBUG)
2056       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057 #endif
2058       for (k=0; k<count; k++) {
2059         aa[0] =  zero;
2060         aa    += bs;
2061       }
2062       if (diag != 0.0) {
2063         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064       }
2065     }
2066   }
2067 
2068   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2076 {
2077   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2078   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2079   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2080   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2081   PetscErrorCode ierr;
2082   PetscInt       ridx,cidx,bs2=a->bs2;
2083   PetscTruth     roworiented=a->roworiented;
2084   MatScalar      *ap,value,*aa=a->a,*bap;
2085 
2086   PetscFunctionBegin;
2087   if (v) PetscValidScalarPointer(v,6);
2088   for (k=0; k<m; k++) { /* loop over added rows */
2089     row  = im[k];
2090     brow = row/bs;
2091     if (row < 0) continue;
2092 #if defined(PETSC_USE_DEBUG)
2093     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2094 #endif
2095     rp   = aj + ai[brow];
2096     ap   = aa + bs2*ai[brow];
2097     rmax = imax[brow];
2098     nrow = ailen[brow];
2099     low  = 0;
2100     high = nrow;
2101     for (l=0; l<n; l++) { /* loop over added columns */
2102       if (in[l] < 0) continue;
2103 #if defined(PETSC_USE_DEBUG)
2104       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2105 #endif
2106       col = in[l]; bcol = col/bs;
2107       ridx = row % bs; cidx = col % bs;
2108       if (roworiented) {
2109         value = v[l + k*n];
2110       } else {
2111         value = v[k + l*m];
2112       }
2113       if (col <= lastcol) low = 0; else high = nrow;
2114       lastcol = col;
2115       while (high-low > 7) {
2116         t = (low+high)/2;
2117         if (rp[t] > bcol) high = t;
2118         else              low  = t;
2119       }
2120       for (i=low; i<high; i++) {
2121         if (rp[i] > bcol) break;
2122         if (rp[i] == bcol) {
2123           bap  = ap +  bs2*i + bs*cidx + ridx;
2124           if (is == ADD_VALUES) *bap += value;
2125           else                  *bap  = value;
2126           goto noinsert1;
2127         }
2128       }
2129       if (nonew == 1) goto noinsert1;
2130       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2131       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2132       N = nrow++ - 1; high++;
2133       /* shift up all the later entries in this row */
2134       for (ii=N; ii>=i; ii--) {
2135         rp[ii+1] = rp[ii];
2136         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2137       }
2138       if (N>=i) {
2139         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2140       }
2141       rp[i]                      = bcol;
2142       ap[bs2*i + bs*cidx + ridx] = value;
2143       a->nz++;
2144       noinsert1:;
2145       low = i;
2146     }
2147     ailen[brow] = nrow;
2148   }
2149   A->same_nonzero = PETSC_FALSE;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2156 {
2157   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2158   Mat            outA;
2159   PetscErrorCode ierr;
2160   PetscTruth     row_identity,col_identity;
2161 
2162   PetscFunctionBegin;
2163   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2164   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2165   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2166   if (!row_identity || !col_identity) {
2167     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2168   }
2169 
2170   outA          = inA;
2171   inA->factor   = MAT_FACTOR_LU;
2172 
2173   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2174 
2175   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2176   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2177   a->row = row;
2178   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2179   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2180   a->col = col;
2181 
2182   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2183   if (a->icol) {
2184     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2185   }
2186   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2188 
2189   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2190   if (!a->solve_work) {
2191     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2192     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2193   }
2194   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2203 {
2204   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2205   PetscInt    i,nz,mbs;
2206 
2207   PetscFunctionBegin;
2208   nz  = baij->maxnz/baij->bs2;
2209   mbs = baij->mbs;
2210   for (i=0; i<nz; i++) {
2211     baij->j[i] = indices[i];
2212   }
2213   baij->nz = nz;
2214   for (i=0; i<mbs; i++) {
2215     baij->ilen[i] = baij->imax[i];
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 EXTERN_C_END
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2223 /*@
2224     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2225        in the matrix.
2226 
2227   Input Parameters:
2228 +  mat - the SeqBAIJ matrix
2229 -  indices - the column indices
2230 
2231   Level: advanced
2232 
2233   Notes:
2234     This can be called if you have precomputed the nonzero structure of the
2235   matrix and want to provide it to the matrix object to improve the performance
2236   of the MatSetValues() operation.
2237 
2238     You MUST have set the correct numbers of nonzeros per row in the call to
2239   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2240 
2241     MUST be called before any calls to MatSetValues();
2242 
2243 @*/
2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2245 {
2246   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2247 
2248   PetscFunctionBegin;
2249   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2250   PetscValidPointer(indices,2);
2251   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2252   if (f) {
2253     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2254   } else {
2255     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2263 {
2264   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2265   PetscErrorCode ierr;
2266   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2267   PetscReal      atmp;
2268   PetscScalar    *x,zero = 0.0;
2269   MatScalar      *aa;
2270   PetscInt       ncols,brow,krow,kcol;
2271 
2272   PetscFunctionBegin;
2273   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2274   bs   = A->rmap->bs;
2275   aa   = a->a;
2276   ai   = a->i;
2277   aj   = a->j;
2278   mbs  = a->mbs;
2279 
2280   ierr = VecSet(v,zero);CHKERRQ(ierr);
2281   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2282   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2283   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2284   for (i=0; i<mbs; i++) {
2285     ncols = ai[1] - ai[0]; ai++;
2286     brow  = bs*i;
2287     for (j=0; j<ncols; j++){
2288       for (kcol=0; kcol<bs; kcol++){
2289         for (krow=0; krow<bs; krow++){
2290           atmp = PetscAbsScalar(*aa);aa++;
2291           row = brow + krow;    /* row index */
2292           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2293           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2294         }
2295       }
2296       aj++;
2297     }
2298   }
2299   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatCopy_SeqBAIJ"
2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2306 {
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   /* If the two matrices have the same copy implementation, use fast copy. */
2311   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2312     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2314 
2315     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2316       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2317     }
2318     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2319   } else {
2320     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2328 {
2329   PetscErrorCode ierr;
2330 
2331   PetscFunctionBegin;
2332   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2339 {
2340   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2341   PetscFunctionBegin;
2342   *array = a->a;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2349 {
2350   PetscFunctionBegin;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #include "petscblaslapack.h"
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2357 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2358 {
2359   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2360   PetscErrorCode ierr;
2361   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2362   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2363 
2364   PetscFunctionBegin;
2365   if (str == SAME_NONZERO_PATTERN) {
2366     PetscScalar alpha = a;
2367     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2368   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2369     if (y->xtoy && y->XtoY != X) {
2370       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2371       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2372     }
2373     if (!y->xtoy) { /* get xtoy */
2374       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2375       y->XtoY = X;
2376       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2377     }
2378     bs2 = bs*bs;
2379     for (i=0; i<x->nz; i++) {
2380       j = 0;
2381       while (j < bs2){
2382         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2383         j++;
2384       }
2385     }
2386     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2387   } else {
2388     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2395 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2396 {
2397   PetscInt rbs,cbs;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2402   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2403   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2404   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2410 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2411 {
2412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2413   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2414   MatScalar      *aa = a->a;
2415 
2416   PetscFunctionBegin;
2417   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2423 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2426   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2427   MatScalar      *aa = a->a;
2428 
2429   PetscFunctionBegin;
2430   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2438 /*
2439     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2440 */
2441 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2442 {
2443   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2444   PetscErrorCode ierr;
2445   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2446   PetscInt       nz = a->i[m],row,*jj,mr,col;
2447 
2448   PetscFunctionBegin;
2449   *nn = n;
2450   if (!ia) PetscFunctionReturn(0);
2451   if (symmetric) {
2452     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2453   } else {
2454     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2455     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2456     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2457     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2458     jj = a->j;
2459     for (i=0; i<nz; i++) {
2460       collengths[jj[i]]++;
2461     }
2462     cia[0] = oshift;
2463     for (i=0; i<n; i++) {
2464       cia[i+1] = cia[i] + collengths[i];
2465     }
2466     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2467     jj   = a->j;
2468     for (row=0; row<m; row++) {
2469       mr = a->i[row+1] - a->i[row];
2470       for (i=0; i<mr; i++) {
2471         col = *jj++;
2472         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473       }
2474     }
2475     ierr = PetscFree(collengths);CHKERRQ(ierr);
2476     *ia = cia; *ja = cja;
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2483 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2484 {
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   if (!ia) PetscFunctionReturn(0);
2489   ierr = PetscFree(*ia);CHKERRQ(ierr);
2490   ierr = PetscFree(*ja);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2496 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2497 {
2498   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2499   PetscErrorCode ierr;
2500   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2501   PetscScalar    dx,*y,*xx,*w3_array;
2502   PetscScalar    *vscale_array;
2503   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2504   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2505   void           *fctx = coloring->fctx;
2506   PetscTruth     flg = PETSC_FALSE;
2507   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2508   Vec            x1_tmp;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2512   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2513   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2514   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2515 
2516   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2517   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2518   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2519   if (flg) {
2520     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2521   } else {
2522     PetscTruth assembled;
2523     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2524     if (assembled) {
2525       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2526     }
2527   }
2528 
2529   x1_tmp = x1;
2530   if (!coloring->vscale){
2531     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2532   }
2533 
2534   /*
2535     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2536     coloring->F for the coarser grids from the finest
2537   */
2538   if (coloring->F) {
2539     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2540     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2541     if (m1 != m2) {
2542       coloring->F = 0;
2543       }
2544     }
2545 
2546   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2547     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2548   }
2549   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2550 
2551   /* Set w1 = F(x1) */
2552   if (coloring->F) {
2553     w1          = coloring->F; /* use already computed value of function */
2554     coloring->F = 0;
2555   } else {
2556     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2557     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2558     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2559   }
2560 
2561   if (!coloring->w3) {
2562     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2563     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2564   }
2565   w3 = coloring->w3;
2566 
2567     CHKMEMQ;
2568     /* Compute all the local scale factors, including ghost points */
2569   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2570   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2571   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2572   if (ctype == IS_COLORING_GHOSTED){
2573     col_start = 0; col_end = N;
2574   } else if (ctype == IS_COLORING_GLOBAL){
2575     xx = xx - start;
2576     vscale_array = vscale_array - start;
2577     col_start = start; col_end = N + start;
2578   }    CHKMEMQ;
2579   for (col=col_start; col<col_end; col++){
2580     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2581     if (coloring->htype[0] == 'w') {
2582       dx = 1.0 + unorm;
2583     } else {
2584       dx  = xx[col];
2585     }
2586     if (dx == 0.0) dx = 1.0;
2587 #if !defined(PETSC_USE_COMPLEX)
2588     if (dx < umin && dx >= 0.0)      dx = umin;
2589     else if (dx < 0.0 && dx > -umin) dx = -umin;
2590 #else
2591     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2592     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2593 #endif
2594     dx               *= epsilon;
2595     vscale_array[col] = 1.0/dx;
2596   }     CHKMEMQ;
2597   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2598   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2599   if (ctype == IS_COLORING_GLOBAL){
2600     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2601     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602   }
2603         CHKMEMQ;
2604   if (coloring->vscaleforrow) {
2605     vscaleforrow = coloring->vscaleforrow;
2606   } else {
2607     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2608   }
2609 
2610 
2611   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2612   /*
2613     Loop over each color
2614   */
2615   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2616   for (k=0; k<coloring->ncolors; k++) {
2617     coloring->currentcolor = k;
2618     for (i=0; i<bs; i++) {
2619       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2620       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2621       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2622       /*
2623 	Loop over each column associated with color
2624 	adding the perturbation to the vector w3.
2625       */
2626       for (l=0; l<coloring->ncolumns[k]; l++) {
2627 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2628 	if (coloring->htype[0] == 'w') {
2629 	  dx = 1.0 + unorm;
2630 	} else {
2631 	  dx  = xx[col];
2632 	}
2633 	if (dx == 0.0) dx = 1.0;
2634 #if !defined(PETSC_USE_COMPLEX)
2635 	if (dx < umin && dx >= 0.0)      dx = umin;
2636 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2637 #else
2638 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2639 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2640 #endif
2641 	dx            *= epsilon;
2642 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2643 	w3_array[col] += dx;
2644       }
2645       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2646       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2647 
2648       /*
2649 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2650 	w2 = F(x1 + dx) - F(x1)
2651       */
2652       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2653       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2654       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2655       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2656 
2657       /*
2658 	Loop over rows of vector, putting results into Jacobian matrix
2659       */
2660       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2661       for (l=0; l<coloring->nrows[k]; l++) {
2662 	row    = bs*coloring->rows[k][l];             /* local row index */
2663 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2664         for (j=0; j<bs; j++) {
2665   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2666           srows[j]  = row + start + j;
2667         }
2668 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2669       }
2670       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2671     }
2672   } /* endof for each color */
2673   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2674   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2675   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2676   ierr = PetscFree(srows);CHKERRQ(ierr);
2677 
2678   coloring->currentcolor = -1;
2679   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 /* -------------------------------------------------------------------*/
2686 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2687        MatGetRow_SeqBAIJ,
2688        MatRestoreRow_SeqBAIJ,
2689        MatMult_SeqBAIJ_N,
2690 /* 4*/ MatMultAdd_SeqBAIJ_N,
2691        MatMultTranspose_SeqBAIJ,
2692        MatMultTransposeAdd_SeqBAIJ,
2693        0,
2694        0,
2695        0,
2696 /*10*/ 0,
2697        MatLUFactor_SeqBAIJ,
2698        0,
2699        0,
2700        MatTranspose_SeqBAIJ,
2701 /*15*/ MatGetInfo_SeqBAIJ,
2702        MatEqual_SeqBAIJ,
2703        MatGetDiagonal_SeqBAIJ,
2704        MatDiagonalScale_SeqBAIJ,
2705        MatNorm_SeqBAIJ,
2706 /*20*/ 0,
2707        MatAssemblyEnd_SeqBAIJ,
2708        MatSetOption_SeqBAIJ,
2709        MatZeroEntries_SeqBAIJ,
2710 /*24*/ MatZeroRows_SeqBAIJ,
2711        0,
2712        0,
2713        0,
2714        0,
2715 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2716        0,
2717        0,
2718        MatGetArray_SeqBAIJ,
2719        MatRestoreArray_SeqBAIJ,
2720 /*34*/ MatDuplicate_SeqBAIJ,
2721        0,
2722        0,
2723        MatILUFactor_SeqBAIJ,
2724        0,
2725 /*39*/ MatAXPY_SeqBAIJ,
2726        MatGetSubMatrices_SeqBAIJ,
2727        MatIncreaseOverlap_SeqBAIJ,
2728        MatGetValues_SeqBAIJ,
2729        MatCopy_SeqBAIJ,
2730 /*44*/ 0,
2731        MatScale_SeqBAIJ,
2732        0,
2733        0,
2734        0,
2735 /*49*/ MatSetBlockSize_SeqBAIJ,
2736        MatGetRowIJ_SeqBAIJ,
2737        MatRestoreRowIJ_SeqBAIJ,
2738        MatGetColumnIJ_SeqBAIJ,
2739        MatRestoreColumnIJ_SeqBAIJ,
2740 /*54*/ MatFDColoringCreate_SeqAIJ,
2741        0,
2742        0,
2743        0,
2744        MatSetValuesBlocked_SeqBAIJ,
2745 /*59*/ MatGetSubMatrix_SeqBAIJ,
2746        MatDestroy_SeqBAIJ,
2747        MatView_SeqBAIJ,
2748        0,
2749        0,
2750 /*64*/ 0,
2751        0,
2752        0,
2753        0,
2754        0,
2755 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2756        0,
2757        MatConvert_Basic,
2758        0,
2759        0,
2760 /*74*/ 0,
2761        MatFDColoringApply_BAIJ,
2762        0,
2763        0,
2764        0,
2765 /*79*/ 0,
2766        0,
2767        0,
2768        0,
2769        MatLoad_SeqBAIJ,
2770 /*84*/ 0,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*89*/ 0,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*94*/ 0,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*99*/0,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*104*/0,
2791        MatRealPart_SeqBAIJ,
2792        MatImaginaryPart_SeqBAIJ,
2793        0,
2794        0,
2795 /*109*/0,
2796        0,
2797        0,
2798        0,
2799        MatMissingDiagonal_SeqBAIJ,
2800 /*114*/0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*119*/0,
2806        0,
2807        MatMultHermitianTranspose_SeqBAIJ,
2808        MatMultHermitianTransposeAdd_SeqBAIJ
2809 };
2810 
2811 EXTERN_C_BEGIN
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2814 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2815 {
2816   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2817   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2818   PetscErrorCode ierr;
2819 
2820   PetscFunctionBegin;
2821   if (aij->nonew != 1) {
2822     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2823   }
2824 
2825   /* allocate space for values if not already there */
2826   if (!aij->saved_values) {
2827     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2829   }
2830 
2831   /* copy values over */
2832   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 EXTERN_C_END
2836 
2837 EXTERN_C_BEGIN
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2840 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2841 {
2842   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2843   PetscErrorCode ierr;
2844   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2845 
2846   PetscFunctionBegin;
2847   if (aij->nonew != 1) {
2848     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2849   }
2850   if (!aij->saved_values) {
2851     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2852   }
2853 
2854   /* copy values over */
2855   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 EXTERN_C_END
2859 
2860 EXTERN_C_BEGIN
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2862 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2863 EXTERN_C_END
2864 
2865 EXTERN_C_BEGIN
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2868 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2869 {
2870   Mat_SeqBAIJ    *b;
2871   PetscErrorCode ierr;
2872   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2873   PetscTruth     flg,skipallocation = PETSC_FALSE;
2874 
2875   PetscFunctionBegin;
2876 
2877   if (nz == MAT_SKIP_ALLOCATION) {
2878     skipallocation = PETSC_TRUE;
2879     nz             = 0;
2880   }
2881 
2882   if (bs < 0) {
2883     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2884       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2885     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2886     bs   = PetscAbs(bs);
2887   }
2888   if (nnz && newbs != bs) {
2889     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2890   }
2891   bs   = newbs;
2892 
2893   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2896   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2897 
2898   B->preallocated = PETSC_TRUE;
2899 
2900   mbs  = B->rmap->n/bs;
2901   nbs  = B->cmap->n/bs;
2902   bs2  = bs*bs;
2903 
2904   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2905     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2906   }
2907 
2908   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2909   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2910   if (nnz) {
2911     for (i=0; i<mbs; i++) {
2912       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2913       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2914     }
2915   }
2916 
2917   b       = (Mat_SeqBAIJ*)B->data;
2918   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2919     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2920   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2921 
2922   if (!flg) {
2923     switch (bs) {
2924     case 1:
2925       B->ops->mult            = MatMult_SeqBAIJ_1;
2926       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2927       B->ops->sor             = MatSOR_SeqBAIJ_1;
2928       break;
2929     case 2:
2930       B->ops->mult            = MatMult_SeqBAIJ_2;
2931       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2932       B->ops->sor             = MatSOR_SeqBAIJ_2;
2933       break;
2934     case 3:
2935       B->ops->mult            = MatMult_SeqBAIJ_3;
2936       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2937       B->ops->sor             = MatSOR_SeqBAIJ_3;
2938       break;
2939     case 4:
2940       B->ops->mult            = MatMult_SeqBAIJ_4;
2941       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2942       B->ops->sor             = MatSOR_SeqBAIJ_4;
2943       break;
2944     case 5:
2945       B->ops->mult            = MatMult_SeqBAIJ_5;
2946       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2947       B->ops->sor             = MatSOR_SeqBAIJ_5;
2948       break;
2949     case 6:
2950       B->ops->mult            = MatMult_SeqBAIJ_6;
2951       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2952       B->ops->sor             = MatSOR_SeqBAIJ_6;
2953       break;
2954     case 7:
2955       B->ops->mult            = MatMult_SeqBAIJ_7;
2956       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2957       B->ops->sor             = MatSOR_SeqBAIJ_7;
2958       break;
2959     case 15:
2960       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2961       break;
2962     default:
2963       B->ops->mult            = MatMult_SeqBAIJ_N;
2964       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2965       break;
2966     }
2967   }
2968   B->rmap->bs      = bs;
2969   b->mbs     = mbs;
2970   b->nbs     = nbs;
2971   if (!skipallocation) {
2972     if (!b->imax) {
2973       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2974       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2975       b->free_imax_ilen = PETSC_TRUE;
2976     }
2977     /* b->ilen will count nonzeros in each block row so far. */
2978     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2979     if (!nnz) {
2980       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2981       else if (nz <= 0)        nz = 1;
2982       for (i=0; i<mbs; i++) b->imax[i] = nz;
2983       nz = nz*mbs;
2984     } else {
2985       nz = 0;
2986       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2987     }
2988 
2989     /* allocate the matrix space */
2990     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2991     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2992     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2993     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2994     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2995     b->singlemalloc = PETSC_TRUE;
2996     b->i[0] = 0;
2997     for (i=1; i<mbs+1; i++) {
2998       b->i[i] = b->i[i-1] + b->imax[i-1];
2999     }
3000     b->free_a     = PETSC_TRUE;
3001     b->free_ij    = PETSC_TRUE;
3002   } else {
3003     b->free_a     = PETSC_FALSE;
3004     b->free_ij    = PETSC_FALSE;
3005   }
3006 
3007   B->rmap->bs          = bs;
3008   b->bs2              = bs2;
3009   b->mbs              = mbs;
3010   b->nz               = 0;
3011   b->maxnz            = nz*bs2;
3012   B->info.nz_unneeded = (PetscReal)b->maxnz;
3013   PetscFunctionReturn(0);
3014 }
3015 EXTERN_C_END
3016 
3017 EXTERN_C_BEGIN
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3020 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3021 {
3022   PetscInt       i,m,nz,nz_max=0,*nnz;
3023   PetscScalar    *values=0;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027 
3028   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3029 
3030   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3031   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3032   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3033   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3034   m = B->rmap->n/bs;
3035 
3036   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3037   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3038   for(i=0; i<m; i++) {
3039     nz = ii[i+1]- ii[i];
3040     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3041     nz_max = PetscMax(nz_max, nz);
3042     nnz[i] = nz;
3043   }
3044   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3045   ierr = PetscFree(nnz);CHKERRQ(ierr);
3046 
3047   values = (PetscScalar*)V;
3048   if (!values) {
3049     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3050     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3051   }
3052   for (i=0; i<m; i++) {
3053     PetscInt          ncols  = ii[i+1] - ii[i];
3054     const PetscInt    *icols = jj + ii[i];
3055     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3056     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3057   }
3058   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3059   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061 
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 
3067 EXTERN_C_BEGIN
3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3070 EXTERN_C_END
3071 
3072 /*MC
3073    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3074    block sparse compressed row format.
3075 
3076    Options Database Keys:
3077 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3078 
3079   Level: beginner
3080 
3081 .seealso: MatCreateSeqBAIJ()
3082 M*/
3083 
3084 
3085 EXTERN_C_BEGIN
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatCreate_SeqBAIJ"
3088 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3089 {
3090   PetscErrorCode ierr;
3091   PetscMPIInt    size;
3092   Mat_SeqBAIJ    *b;
3093 
3094   PetscFunctionBegin;
3095   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3096   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3097 
3098   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3099   B->data = (void*)b;
3100   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3101   B->mapping               = 0;
3102   b->row                   = 0;
3103   b->col                   = 0;
3104   b->icol                  = 0;
3105   b->reallocs              = 0;
3106   b->saved_values          = 0;
3107 
3108   b->roworiented           = PETSC_TRUE;
3109   b->nonew                 = 0;
3110   b->diag                  = 0;
3111   b->solve_work            = 0;
3112   b->mult_work             = 0;
3113   B->spptr                 = 0;
3114   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3115   b->keepnonzeropattern    = PETSC_FALSE;
3116   b->xtoy                  = 0;
3117   b->XtoY                  = 0;
3118   b->compressedrow.use     = PETSC_FALSE;
3119   b->compressedrow.nrows   = 0;
3120   b->compressedrow.i       = PETSC_NULL;
3121   b->compressedrow.rindex  = PETSC_NULL;
3122   b->compressedrow.checked = PETSC_FALSE;
3123   B->same_nonzero          = PETSC_FALSE;
3124 
3125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3126                                      "MatGetFactorAvailable_seqbaij_petsc",
3127                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3129                                      "MatGetFactor_seqbaij_petsc",
3130                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3132                                      "MatInvertBlockDiagonal_SeqBAIJ",
3133                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3135                                      "MatStoreValues_SeqBAIJ",
3136                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3138                                      "MatRetrieveValues_SeqBAIJ",
3139                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3141                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3142                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3144                                      "MatConvert_SeqBAIJ_SeqAIJ",
3145                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3147                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3148                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3150                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3151                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3153                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3154                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3155   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 EXTERN_C_END
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3162 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3163 {
3164   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3165   PetscErrorCode ierr;
3166   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3167 
3168   PetscFunctionBegin;
3169   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3170 
3171   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172     c->imax = a->imax;
3173     c->ilen = a->ilen;
3174     c->free_imax_ilen = PETSC_FALSE;
3175   } else {
3176     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3177     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3178     for (i=0; i<mbs; i++) {
3179       c->imax[i] = a->imax[i];
3180       c->ilen[i] = a->ilen[i];
3181     }
3182     c->free_imax_ilen = PETSC_TRUE;
3183   }
3184 
3185   /* allocate the matrix space */
3186   if (mallocmatspace){
3187     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3188       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3189       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3190       c->singlemalloc = PETSC_FALSE;
3191       c->free_ij      = PETSC_FALSE;
3192       c->i            = a->i;
3193       c->j            = a->j;
3194       c->parent       = A;
3195       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3196       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3197       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3198     } else {
3199       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3200       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3201       c->singlemalloc = PETSC_TRUE;
3202       c->free_ij      = PETSC_TRUE;
3203       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3204       if (mbs > 0) {
3205 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3206 	if (cpvalues == MAT_COPY_VALUES) {
3207 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3208 	} else {
3209 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3210 	}
3211       }
3212     }
3213   }
3214 
3215   c->roworiented = a->roworiented;
3216   c->nonew       = a->nonew;
3217   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3218   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3219   c->bs2         = a->bs2;
3220   c->mbs         = a->mbs;
3221   c->nbs         = a->nbs;
3222 
3223   if (a->diag) {
3224     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225       c->diag      = a->diag;
3226       c->free_diag = PETSC_FALSE;
3227     } else {
3228       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3229       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3230       for (i=0; i<mbs; i++) {
3231         c->diag[i] = a->diag[i];
3232       }
3233       c->free_diag = PETSC_TRUE;
3234     }
3235   } else c->diag        = 0;
3236   c->nz                 = a->nz;
3237   c->maxnz              = a->maxnz;
3238   c->solve_work         = 0;
3239   c->mult_work          = 0;
3240   c->free_a             = PETSC_TRUE;
3241   c->free_ij            = PETSC_TRUE;
3242   C->preallocated       = PETSC_TRUE;
3243   C->assembled          = PETSC_TRUE;
3244 
3245   c->compressedrow.use     = a->compressedrow.use;
3246   c->compressedrow.nrows   = a->compressedrow.nrows;
3247   c->compressedrow.checked = a->compressedrow.checked;
3248   if (a->compressedrow.checked && a->compressedrow.use){
3249     i = a->compressedrow.nrows;
3250     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3251     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3252     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3253     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3254   } else {
3255     c->compressedrow.use    = PETSC_FALSE;
3256     c->compressedrow.i      = PETSC_NULL;
3257     c->compressedrow.rindex = PETSC_NULL;
3258   }
3259   C->same_nonzero = A->same_nonzero;
3260   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3261   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3267 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3268 {
3269     PetscErrorCode ierr;
3270 
3271   PetscFunctionBegin;
3272   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3273   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3274   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3275   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatLoad_SeqBAIJ"
3281 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3282 {
3283   Mat_SeqBAIJ    *a;
3284   Mat            B;
3285   PetscErrorCode ierr;
3286   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3287   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3288   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3289   PetscInt       *masked,nmask,tmp,bs2,ishift;
3290   PetscMPIInt    size;
3291   int            fd;
3292   PetscScalar    *aa;
3293   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3294 
3295   PetscFunctionBegin;
3296   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3297     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3298   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3299   bs2  = bs*bs;
3300 
3301   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3302   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3303   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3304   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3305   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3306   M = header[1]; N = header[2]; nz = header[3];
3307 
3308   if (header[3] < 0) {
3309     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3310   }
3311 
3312   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3313 
3314   /*
3315      This code adds extra rows to make sure the number of rows is
3316     divisible by the blocksize
3317   */
3318   mbs        = M/bs;
3319   extra_rows = bs - M + bs*(mbs);
3320   if (extra_rows == bs) extra_rows = 0;
3321   else                  mbs++;
3322   if (extra_rows) {
3323     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3324   }
3325 
3326   /* read in row lengths */
3327   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3328   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3329   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3330 
3331   /* read in column indices */
3332   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3333   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3334   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3335 
3336   /* loop over row lengths determining block row lengths */
3337   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3338   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3339   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3340   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3341   rowcount = 0;
3342   nzcount = 0;
3343   for (i=0; i<mbs; i++) {
3344     nmask = 0;
3345     for (j=0; j<bs; j++) {
3346       kmax = rowlengths[rowcount];
3347       for (k=0; k<kmax; k++) {
3348         tmp = jj[nzcount++]/bs;
3349         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3350       }
3351       rowcount++;
3352     }
3353     browlengths[i] += nmask;
3354     /* zero out the mask elements we set */
3355     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356   }
3357 
3358   /* create our matrix */
3359   ierr = MatCreate(comm,&B);
3360   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3361   ierr = MatSetType(B,type);CHKERRQ(ierr);
3362   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3363   a = (Mat_SeqBAIJ*)B->data;
3364 
3365   /* set matrix "i" values */
3366   a->i[0] = 0;
3367   for (i=1; i<= mbs; i++) {
3368     a->i[i]      = a->i[i-1] + browlengths[i-1];
3369     a->ilen[i-1] = browlengths[i-1];
3370   }
3371   a->nz         = 0;
3372   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3373 
3374   /* read in nonzero values */
3375   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3376   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3377   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3378 
3379   /* set "a" and "j" values into matrix */
3380   nzcount = 0; jcount = 0;
3381   for (i=0; i<mbs; i++) {
3382     nzcountb = nzcount;
3383     nmask    = 0;
3384     for (j=0; j<bs; j++) {
3385       kmax = rowlengths[i*bs+j];
3386       for (k=0; k<kmax; k++) {
3387         tmp = jj[nzcount++]/bs;
3388 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3389       }
3390     }
3391     /* sort the masked values */
3392     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3393 
3394     /* set "j" values into matrix */
3395     maskcount = 1;
3396     for (j=0; j<nmask; j++) {
3397       a->j[jcount++]  = masked[j];
3398       mask[masked[j]] = maskcount++;
3399     }
3400     /* set "a" values into matrix */
3401     ishift = bs2*a->i[i];
3402     for (j=0; j<bs; j++) {
3403       kmax = rowlengths[i*bs+j];
3404       for (k=0; k<kmax; k++) {
3405         tmp       = jj[nzcountb]/bs ;
3406         block     = mask[tmp] - 1;
3407         point     = jj[nzcountb] - bs*tmp;
3408         idx       = ishift + bs2*block + j + bs*point;
3409         a->a[idx] = (MatScalar)aa[nzcountb++];
3410       }
3411     }
3412     /* zero out the mask elements we set */
3413     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3414   }
3415   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3416 
3417   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3418   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3419   ierr = PetscFree(aa);CHKERRQ(ierr);
3420   ierr = PetscFree(jj);CHKERRQ(ierr);
3421   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3422 
3423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr = MatView_Private(B);CHKERRQ(ierr);
3426 
3427   *A = B;
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatCreateSeqBAIJ"
3433 /*@C
3434    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3435    compressed row) format.  For good matrix assembly performance the
3436    user should preallocate the matrix storage by setting the parameter nz
3437    (or the array nnz).  By setting these parameters accurately, performance
3438    during matrix assembly can be increased by more than a factor of 50.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator, set to PETSC_COMM_SELF
3444 .  bs - size of block
3445 .  m - number of rows
3446 .  n - number of columns
3447 .  nz - number of nonzero blocks  per block row (same for all rows)
3448 -  nnz - array containing the number of nonzero blocks in the various block rows
3449          (possibly different for each block row) or PETSC_NULL
3450 
3451    Output Parameter:
3452 .  A - the matrix
3453 
3454    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3457 
3458    Options Database Keys:
3459 .   -mat_no_unroll - uses code that does not unroll the loops in the
3460                      block calculations (much slower)
3461 .    -mat_block_size - size of the blocks to use
3462 
3463    Level: intermediate
3464 
3465    Notes:
3466    The number of rows and columns must be divisible by blocksize.
3467 
3468    If the nnz parameter is given then the nz parameter is ignored
3469 
3470    A nonzero block is any block that as 1 or more nonzeros in it
3471 
3472    The block AIJ format is fully compatible with standard Fortran 77
3473    storage.  That is, the stored row and column indices can begin at
3474    either one (as in Fortran) or zero.  See the users' manual for details.
3475 
3476    Specify the preallocated storage with either nz or nnz (not both).
3477    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3478    allocation.  For additional details, see the users manual chapter on
3479    matrices.
3480 
3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3484 {
3485   PetscErrorCode ierr;
3486 
3487   PetscFunctionBegin;
3488   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3489   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3490   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3491   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3497 /*@C
3498    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3499    per row in the matrix. For good matrix assembly performance the
3500    user should preallocate the matrix storage by setting the parameter nz
3501    (or the array nnz).  By setting these parameters accurately, performance
3502    during matrix assembly can be increased by more than a factor of 50.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  A - the matrix
3508 .  bs - size of block
3509 .  nz - number of block nonzeros per block row (same for all rows)
3510 -  nnz - array containing the number of block nonzeros in the various block rows
3511          (possibly different for each block row) or PETSC_NULL
3512 
3513    Options Database Keys:
3514 .   -mat_no_unroll - uses code that does not unroll the loops in the
3515                      block calculations (much slower)
3516 .    -mat_block_size - size of the blocks to use
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521    If the nnz parameter is given then the nz parameter is ignored
3522 
3523    You can call MatGetInfo() to get information on how effective the preallocation was;
3524    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3525    You can also run with the option -info and look for messages with the string
3526    malloc in them to see if additional memory allocation was needed.
3527 
3528    The block AIJ format is fully compatible with standard Fortran 77
3529    storage.  That is, the stored row and column indices can begin at
3530    either one (as in Fortran) or zero.  See the users' manual for details.
3531 
3532    Specify the preallocated storage with either nz or nnz (not both).
3533    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3534    allocation.  For additional details, see the users manual chapter on
3535    matrices.
3536 
3537 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3538 @*/
3539 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3540 {
3541   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3542 
3543   PetscFunctionBegin;
3544   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3545   if (f) {
3546     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3547   }
3548   PetscFunctionReturn(0);
3549 }
3550 
3551 #undef __FUNCT__
3552 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3553 /*@C
3554    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3555    (the default sequential PETSc format).
3556 
3557    Collective on MPI_Comm
3558 
3559    Input Parameters:
3560 +  A - the matrix
3561 .  i - the indices into j for the start of each local row (starts with zero)
3562 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3563 -  v - optional values in the matrix
3564 
3565    Level: developer
3566 
3567 .keywords: matrix, aij, compressed row, sparse
3568 
3569 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3570 @*/
3571 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3572 {
3573   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3574 
3575   PetscFunctionBegin;
3576   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3577   if (f) {
3578     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3586 /*@
3587      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3588               (upper triangular entries in CSR format) provided by the user.
3589 
3590      Collective on MPI_Comm
3591 
3592    Input Parameters:
3593 +  comm - must be an MPI communicator of size 1
3594 .  bs - size of block
3595 .  m - number of rows
3596 .  n - number of columns
3597 .  i - row indices
3598 .  j - column indices
3599 -  a - matrix values
3600 
3601    Output Parameter:
3602 .  mat - the matrix
3603 
3604    Level: intermediate
3605 
3606    Notes:
3607        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3608     once the matrix is destroyed
3609 
3610        You cannot set new nonzero locations into this matrix, that will generate an error.
3611 
3612        The i and j indices are 0 based
3613 
3614 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3615 
3616 @*/
3617 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3618 {
3619   PetscErrorCode ierr;
3620   PetscInt       ii;
3621   Mat_SeqBAIJ    *baij;
3622 
3623   PetscFunctionBegin;
3624   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3625   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3626 
3627   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3628   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3629   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3630   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3631   baij = (Mat_SeqBAIJ*)(*mat)->data;
3632   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3633   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3634 
3635   baij->i = i;
3636   baij->j = j;
3637   baij->a = a;
3638   baij->singlemalloc = PETSC_FALSE;
3639   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3640   baij->free_a       = PETSC_FALSE;
3641   baij->free_ij       = PETSC_FALSE;
3642 
3643   for (ii=0; ii<m; ii++) {
3644     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3645 #if defined(PETSC_USE_DEBUG)
3646     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3647 #endif
3648   }
3649 #if defined(PETSC_USE_DEBUG)
3650   for (ii=0; ii<baij->i[m]; ii++) {
3651     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3652     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3653   }
3654 #endif
3655 
3656   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660