xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 6aa9148f4397cd347261e3043c0fcf0ae62e5ac4)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     default:
129       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 EXTERN_C_END
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
138 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139 {
140   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141   PetscScalar        *x,x1,s1;
142   const PetscScalar  *b;
143   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144   PetscErrorCode     ierr;
145   PetscInt           m = a->mbs,i,i2,nz,idx;
146   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
147 
148   PetscFunctionBegin;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150   its = its*lits;
151   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
156 
157   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
158 
159   diag  = a->diag;
160   idiag = a->idiag;
161   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
163 
164   if (flag & SOR_ZERO_INITIAL_GUESS) {
165     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166       x[0] = b[0]*idiag[0];
167       i2     = 1;
168       idiag += 1;
169       for (i=1; i<m; i++) {
170         v     = aa + ai[i];
171         vi    = aj + ai[i];
172         nz    = diag[i] - ai[i];
173         s1    = b[i2];
174         while (nz--) {
175           idx  = (*vi++);
176           x1   = x[idx];
177           s1  -= v[0]*x1;
178           v   += 1;
179         }
180         x[i2]   = idiag[0]*s1;
181         idiag   += 1;
182         i2      += 1;
183       }
184       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
185       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
186     }
187     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
188         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
189       i2    = 0;
190       mdiag = a->idiag+a->mbs;
191       for (i=0; i<m; i++) {
192         x1      = x[i2];
193         x[i2]   = mdiag[0]*x1;
194         mdiag  += 1;
195         i2     += 1;
196       }
197       ierr = PetscLogFlops(m);CHKERRQ(ierr);
198     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
199       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
200     }
201     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
202       idiag   = a->idiag+a->mbs - 1;
203       i2      = m - 1;
204       x1      = x[i2];
205       x[i2]   = idiag[0]*x1;
206       idiag -= 1;
207       i2    -= 1;
208       for (i=m-2; i>=0; i--) {
209         v     = aa + (diag[i]+1);
210         vi    = aj + diag[i] + 1;
211         nz    = ai[i+1] - diag[i] - 1;
212         s1    = x[i2];
213         while (nz--) {
214           idx  = (*vi++);
215           x1   = x[idx];
216           s1  -= v[0]*x1;
217           v   += 1;
218         }
219         x[i2]   = idiag[0]*s1;
220         idiag   -= 1;
221         i2      -= 1;
222       }
223       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
224     }
225   } else {
226     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
227   }
228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
229   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
235 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar        *x,x1,x2,s1,s2;
239   const PetscScalar  *b;
240   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
241   PetscErrorCode     ierr;
242   PetscInt           m = a->mbs,i,i2,nz,idx;
243   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
244 
245   PetscFunctionBegin;
246   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
247   its = its*lits;
248   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
249   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
250   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
251   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
252   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
253 
254   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
255 
256   diag  = a->diag;
257   idiag = a->idiag;
258   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
260 
261   if (flag & SOR_ZERO_INITIAL_GUESS) {
262     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
263       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
264       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
265       i2     = 2;
266       idiag += 4;
267       for (i=1; i<m; i++) {
268 	v     = aa + 4*ai[i];
269 	vi    = aj + ai[i];
270 	nz    = diag[i] - ai[i];
271 	s1    = b[i2]; s2 = b[i2+1];
272 	while (nz--) {
273 	  idx  = 2*(*vi++);
274 	  x1   = x[idx]; x2 = x[1+idx];
275 	  s1  -= v[0]*x1 + v[2]*x2;
276 	  s2  -= v[1]*x1 + v[3]*x2;
277 	  v   += 4;
278 	}
279 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
280 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
281         idiag   += 4;
282         i2      += 2;
283       }
284       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
285       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
286     }
287     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
288         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
289       i2    = 0;
290       mdiag = a->idiag+4*a->mbs;
291       for (i=0; i<m; i++) {
292         x1      = x[i2]; x2 = x[i2+1];
293         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
294         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
295         mdiag  += 4;
296         i2     += 2;
297       }
298       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
299     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
300       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
301     }
302     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
303       idiag   = a->idiag+4*a->mbs - 4;
304       i2      = 2*m - 2;
305       x1      = x[i2]; x2 = x[i2+1];
306       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
307       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
308       idiag -= 4;
309       i2    -= 2;
310       for (i=m-2; i>=0; i--) {
311 	v     = aa + 4*(diag[i]+1);
312 	vi    = aj + diag[i] + 1;
313 	nz    = ai[i+1] - diag[i] - 1;
314 	s1    = x[i2]; s2 = x[i2+1];
315 	while (nz--) {
316 	  idx  = 2*(*vi++);
317 	  x1   = x[idx]; x2 = x[1+idx];
318 	  s1  -= v[0]*x1 + v[2]*x2;
319 	  s2  -= v[1]*x1 + v[3]*x2;
320 	  v   += 4;
321 	}
322 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
323 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
324         idiag   -= 4;
325         i2      -= 2;
326       }
327       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
328     }
329   } else {
330     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
331   }
332   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
333   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
339 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
340 {
341   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
343   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
344   const PetscScalar  *b;
345   PetscErrorCode     ierr;
346   PetscInt           m = a->mbs,i,i2,nz,idx;
347   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
348 
349   PetscFunctionBegin;
350   its = its*lits;
351   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
352   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
353   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
354   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
355   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
356   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
357 
358   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
359 
360   diag  = a->diag;
361   idiag = a->idiag;
362   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
364 
365   if (flag & SOR_ZERO_INITIAL_GUESS) {
366     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
367       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
368       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
369       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
370       i2     = 3;
371       idiag += 9;
372       for (i=1; i<m; i++) {
373         v     = aa + 9*ai[i];
374         vi    = aj + ai[i];
375         nz    = diag[i] - ai[i];
376         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
377         while (nz--) {
378           idx  = 3*(*vi++);
379           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
380           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
381           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
382           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
383           v   += 9;
384         }
385         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
386         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
387         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
388         idiag   += 9;
389         i2      += 3;
390       }
391       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
392       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
393     }
394     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
395         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
396       i2    = 0;
397       mdiag = a->idiag+9*a->mbs;
398       for (i=0; i<m; i++) {
399         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
400         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
401         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
402         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
403         mdiag  += 9;
404         i2     += 3;
405       }
406       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
407     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
408       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
409     }
410     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
411       idiag   = a->idiag+9*a->mbs - 9;
412       i2      = 3*m - 3;
413       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
414       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
415       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
416       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
417       idiag -= 9;
418       i2    -= 3;
419       for (i=m-2; i>=0; i--) {
420         v     = aa + 9*(diag[i]+1);
421         vi    = aj + diag[i] + 1;
422         nz    = ai[i+1] - diag[i] - 1;
423         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
424         while (nz--) {
425           idx  = 3*(*vi++);
426           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
427           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
428           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
429           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
430           v   += 9;
431         }
432         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
433         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
434         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
435         idiag   -= 9;
436         i2      -= 3;
437       }
438       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
439     }
440   } else {
441     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
442   }
443   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
450 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
451 {
452   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
454   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
455   const PetscScalar  *b;
456   PetscErrorCode     ierr;
457   PetscInt           m = a->mbs,i,i2,nz,idx;
458   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
459 
460   PetscFunctionBegin;
461   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
462   its = its*lits;
463   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
464   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
465   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
466   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
467   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
468 
469   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
470 
471   diag  = a->diag;
472   idiag = a->idiag;
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
475 
476   if (flag & SOR_ZERO_INITIAL_GUESS) {
477     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
478       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
479       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
480       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
481       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
482       i2     = 4;
483       idiag += 16;
484       for (i=1; i<m; i++) {
485 	v     = aa + 16*ai[i];
486 	vi    = aj + ai[i];
487 	nz    = diag[i] - ai[i];
488 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
489 	while (nz--) {
490 	  idx  = 4*(*vi++);
491 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
492 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
493 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
494 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
495 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
496 	  v   += 16;
497 	}
498 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
499 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
500 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
501 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
502         idiag   += 16;
503         i2      += 4;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+16*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
515         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
516         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
517         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
518         mdiag  += 16;
519         i2     += 4;
520       }
521       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
522     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
523       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
524     }
525     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
526       idiag   = a->idiag+16*a->mbs - 16;
527       i2      = 4*m - 4;
528       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
529       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
530       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
531       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
532       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
533       idiag -= 16;
534       i2    -= 4;
535       for (i=m-2; i>=0; i--) {
536 	v     = aa + 16*(diag[i]+1);
537 	vi    = aj + diag[i] + 1;
538 	nz    = ai[i+1] - diag[i] - 1;
539 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
540 	while (nz--) {
541 	  idx  = 4*(*vi++);
542 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
543 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
544 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
545 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
546 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
547 	  v   += 16;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
550 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
551 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
552 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
553         idiag   -= 16;
554         i2      -= 4;
555       }
556       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
568 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
569 {
570   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
571   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
572   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
573   const PetscScalar  *b;
574   PetscErrorCode     ierr;
575   PetscInt           m = a->mbs,i,i2,nz,idx;
576   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
577 
578   PetscFunctionBegin;
579   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
580   its = its*lits;
581   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
582   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
583   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
584   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
585   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
586 
587   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
588 
589   diag  = a->diag;
590   idiag = a->idiag;
591   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
593 
594   if (flag & SOR_ZERO_INITIAL_GUESS) {
595     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
596       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
597       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
598       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
599       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
600       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
601       i2     = 5;
602       idiag += 25;
603       for (i=1; i<m; i++) {
604 	v     = aa + 25*ai[i];
605 	vi    = aj + ai[i];
606 	nz    = diag[i] - ai[i];
607 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
608 	while (nz--) {
609 	  idx  = 5*(*vi++);
610 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
611 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
612 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
613 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
614 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
615 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
616 	  v   += 25;
617 	}
618 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
619 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
620 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
621 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
622 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
623         idiag   += 25;
624         i2      += 5;
625       }
626       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
627       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
628     }
629     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
630         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
631       i2    = 0;
632       mdiag = a->idiag+25*a->mbs;
633       for (i=0; i<m; i++) {
634         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
635         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
636         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
637         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
638         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
639         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
640         mdiag  += 25;
641         i2     += 5;
642       }
643       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
644     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
645       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
646     }
647     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
648       idiag   = a->idiag+25*a->mbs - 25;
649       i2      = 5*m - 5;
650       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
651       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
652       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
653       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
654       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
655       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
656       idiag -= 25;
657       i2    -= 5;
658       for (i=m-2; i>=0; i--) {
659 	v     = aa + 25*(diag[i]+1);
660 	vi    = aj + diag[i] + 1;
661 	nz    = ai[i+1] - diag[i] - 1;
662 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
663 	while (nz--) {
664 	  idx  = 5*(*vi++);
665 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
666 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
667 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
668 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
669 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
670 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
671 	  v   += 25;
672 	}
673 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
674 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
675 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
676 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
677 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
678         idiag   -= 25;
679         i2      -= 5;
680       }
681       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
682     }
683   } else {
684     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
685   }
686   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
693 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
694 {
695   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
696   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
697   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
698   const PetscScalar  *b;
699   PetscErrorCode     ierr;
700   PetscInt           m = a->mbs,i,i2,nz,idx;
701   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
702 
703   PetscFunctionBegin;
704   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
705   its = its*lits;
706   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
707   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
708   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
709   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
710   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
711 
712   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
713 
714   diag  = a->diag;
715   idiag = a->idiag;
716   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
718 
719   if (flag & SOR_ZERO_INITIAL_GUESS) {
720     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
721       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
722       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
723       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
724       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
725       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
726       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
727       i2     = 6;
728       idiag += 36;
729       for (i=1; i<m; i++) {
730         v     = aa + 36*ai[i];
731         vi    = aj + ai[i];
732         nz    = diag[i] - ai[i];
733         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
734         while (nz--) {
735           idx  = 6*(*vi++);
736           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
737           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
738           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
739           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
740           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
741           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
742           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
743           v   += 36;
744         }
745         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
746         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
747         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
748         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
749         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
750         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
751         idiag   += 36;
752         i2      += 6;
753       }
754       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
755       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
756     }
757     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
758         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
759       i2    = 0;
760       mdiag = a->idiag+36*a->mbs;
761       for (i=0; i<m; i++) {
762         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
763         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
764         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
765         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
766         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
767         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
768         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
769         mdiag  += 36;
770         i2     += 6;
771       }
772       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
773     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
774       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
777       idiag   = a->idiag+36*a->mbs - 36;
778       i2      = 6*m - 6;
779       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
780       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
781       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
782       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
783       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
784       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
785       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
786       idiag -= 36;
787       i2    -= 6;
788       for (i=m-2; i>=0; i--) {
789         v     = aa + 36*(diag[i]+1);
790         vi    = aj + diag[i] + 1;
791         nz    = ai[i+1] - diag[i] - 1;
792         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
793         while (nz--) {
794           idx  = 6*(*vi++);
795           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
796           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
797           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
798           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
799           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
800           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
801           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
802           v   += 36;
803         }
804         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
805         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
806         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
807         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
808         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
809         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
810         idiag   -= 36;
811         i2      -= 6;
812       }
813       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
814     }
815   } else {
816     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
817   }
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
825 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
826 {
827   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
828   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
829   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
830   const PetscScalar  *b;
831   PetscErrorCode     ierr;
832   PetscInt           m = a->mbs,i,i2,nz,idx;
833   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
834 
835   PetscFunctionBegin;
836   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
837   its = its*lits;
838   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
840   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
841   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
842   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
843 
844   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
845 
846   diag  = a->diag;
847   idiag = a->idiag;
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
850 
851   if (flag & SOR_ZERO_INITIAL_GUESS) {
852     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
853       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
854       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
855       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
856       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
857       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
858       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
859       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
860       i2     = 7;
861       idiag += 49;
862       for (i=1; i<m; i++) {
863         v     = aa + 49*ai[i];
864         vi    = aj + ai[i];
865         nz    = diag[i] - ai[i];
866         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
867         while (nz--) {
868           idx  = 7*(*vi++);
869           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
870           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
871           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
872           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
873           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
874           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
875           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
876           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
877           v   += 49;
878         }
879         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
880         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
881         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
882         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
883         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
884         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
885         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
886         idiag   += 49;
887         i2      += 7;
888       }
889       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
890       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
891     }
892     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
893         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
894       i2    = 0;
895       mdiag = a->idiag+49*a->mbs;
896       for (i=0; i<m; i++) {
897         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
899         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
900         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
901         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
902         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
903         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
904         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
905         mdiag  += 36;
906         i2     += 6;
907       }
908       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
909     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
910       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
911     }
912     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
913       idiag   = a->idiag+49*a->mbs - 49;
914       i2      = 7*m - 7;
915       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
916       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
917       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
918       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
919       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
920       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
921       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
922       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
923       idiag -= 49;
924       i2    -= 7;
925       for (i=m-2; i>=0; i--) {
926         v     = aa + 49*(diag[i]+1);
927         vi    = aj + diag[i] + 1;
928         nz    = ai[i+1] - diag[i] - 1;
929         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
930         while (nz--) {
931           idx  = 7*(*vi++);
932           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
933           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
934           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
935           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
936           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
937           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
938           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
939           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
940           v   += 49;
941         }
942         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
943         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
944         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
945         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
946         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
947         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
948         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
949         idiag   -= 49;
950         i2      -= 7;
951       }
952       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
953     }
954   } else {
955     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
956   }
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*
963     Special version for direct calls from Fortran (Used in PETSc-fun3d)
964 */
965 #if defined(PETSC_HAVE_FORTRAN_CAPS)
966 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
967 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
968 #define matsetvaluesblocked4_ matsetvaluesblocked4
969 #endif
970 
971 EXTERN_C_BEGIN
972 #undef __FUNCT__
973 #define __FUNCT__ "matsetvaluesblocked4_"
974 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
975 {
976   Mat               A = *AA;
977   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
979   PetscInt          *ai=a->i,*ailen=a->ilen;
980   PetscInt          *aj=a->j,stepval,lastcol = -1;
981   const PetscScalar *value = v;
982   MatScalar         *ap,*aa = a->a,*bap;
983 
984   PetscFunctionBegin;
985   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
986   stepval = (n-1)*4;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k];
989     rp   = aj + ai[row];
990     ap   = aa + 16*ai[row];
991     nrow = ailen[row];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col = in[l];
996       if (col <= lastcol) low = 0; else high = nrow;
997       lastcol = col;
998       value = v + k*(stepval+4 + l)*4;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > col) high = t;
1002         else             low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > col) break;
1006         if (rp[i] == col) {
1007           bap  = ap +  16*i;
1008           for (ii=0; ii<4; ii++,value+=stepval) {
1009             for (jj=ii; jj<16; jj+=4) {
1010               bap[jj] += *value++;
1011             }
1012           }
1013           goto noinsert2;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column index thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       for (ii=N; ii>=i; ii--) {
1020         rp[ii+1] = rp[ii];
1021         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1022       }
1023       if (N >= i) {
1024         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1025       }
1026       rp[i] = col;
1027       bap   = ap +  16*i;
1028       for (ii=0; ii<4; ii++,value+=stepval) {
1029         for (jj=ii; jj<16; jj+=4) {
1030           bap[jj] = *value++;
1031         }
1032       }
1033       noinsert2:;
1034       low = i;
1035     }
1036     ailen[row] = nrow;
1037   }
1038   PetscFunctionReturnVoid();
1039 }
1040 EXTERN_C_END
1041 
1042 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1043 #define matsetvalues4_ MATSETVALUES4
1044 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1045 #define matsetvalues4_ matsetvalues4
1046 #endif
1047 
1048 EXTERN_C_BEGIN
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatSetValues4_"
1051 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1052 {
1053   Mat         A = *AA;
1054   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1055   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1056   PetscInt    *ai=a->i,*ailen=a->ilen;
1057   PetscInt    *aj=a->j,brow,bcol;
1058   PetscInt    ridx,cidx,lastcol = -1;
1059   MatScalar   *ap,value,*aa=a->a,*bap;
1060 
1061   PetscFunctionBegin;
1062   for (k=0; k<m; k++) { /* loop over added rows */
1063     row  = im[k]; brow = row/4;
1064     rp   = aj + ai[brow];
1065     ap   = aa + 16*ai[brow];
1066     nrow = ailen[brow];
1067     low  = 0;
1068     high = nrow;
1069     for (l=0; l<n; l++) { /* loop over added columns */
1070       col = in[l]; bcol = col/4;
1071       ridx = row % 4; cidx = col % 4;
1072       value = v[l + k*n];
1073       if (col <= lastcol) low = 0; else high = nrow;
1074       lastcol = col;
1075       while (high-low > 7) {
1076         t = (low+high)/2;
1077         if (rp[t] > bcol) high = t;
1078         else              low  = t;
1079       }
1080       for (i=low; i<high; i++) {
1081         if (rp[i] > bcol) break;
1082         if (rp[i] == bcol) {
1083           bap  = ap +  16*i + 4*cidx + ridx;
1084           *bap += value;
1085           goto noinsert1;
1086         }
1087       }
1088       N = nrow++ - 1;
1089       high++; /* added new column thus must search to one higher than before */
1090       /* shift up all the later entries in this row */
1091       for (ii=N; ii>=i; ii--) {
1092         rp[ii+1] = rp[ii];
1093         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1094       }
1095       if (N>=i) {
1096         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1097       }
1098       rp[i]                    = bcol;
1099       ap[16*i + 4*cidx + ridx] = value;
1100       noinsert1:;
1101       low = i;
1102     }
1103     ailen[brow] = nrow;
1104   }
1105   PetscFunctionReturnVoid();
1106 }
1107 EXTERN_C_END
1108 
1109 /*
1110      Checks for missing diagonals
1111 */
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1114 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1115 {
1116   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118   PetscInt       *diag,*jj = a->j,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1122   *missing = PETSC_FALSE;
1123   if (A->rmap->n > 0 && !jj) {
1124     *missing  = PETSC_TRUE;
1125     if (d) *d = 0;
1126     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1127   } else {
1128     diag     = a->diag;
1129     for (i=0; i<a->mbs; i++) {
1130       if (jj[diag[i]] != i) {
1131         *missing  = PETSC_TRUE;
1132         if (d) *d = i;
1133         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1134       }
1135     }
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1142 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1143 {
1144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,j,m = a->mbs;
1147 
1148   PetscFunctionBegin;
1149   if (!a->diag) {
1150     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1151   }
1152   for (i=0; i<m; i++) {
1153     a->diag[i] = a->i[i+1];
1154     for (j=a->i[i]; j<a->i[i+1]; j++) {
1155       if (a->j[j] == i) {
1156         a->diag[i] = j;
1157         break;
1158       }
1159     }
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 
1165 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1169 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1170 {
1171   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1174   PetscInt       *tia, *tja;
1175 
1176   PetscFunctionBegin;
1177   *nn = n;
1178   if (!ia) PetscFunctionReturn(0);
1179   if (symmetric) {
1180     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1181   } else {
1182     tia = a->i; tja = a->j;
1183   }
1184 
1185   if (!blockcompressed && bs > 1) {
1186     (*nn) *= bs;
1187     nbs    = bs;
1188     /* malloc & create the natural set of indices */
1189     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1190     if (n) {
1191       (*ia)[0] = 0;
1192       for (j=1; j<bs; j++) {
1193         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1194       }
1195     }
1196 
1197     for (i=1; i<n; i++) {
1198       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1199       for (j=1; j<bs; j++) {
1200         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1201       }
1202     }
1203     if (n) {
1204       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1205     }
1206 
1207     if (ja) {
1208       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1209       cnt = 0;
1210       for (i=0; i<n; i++) {
1211         for (j=0; j<bs; j++) {
1212           for (k=tia[i]; k<tia[i+1]; k++) {
1213             for (l=0; l<bs; l++) {
1214               (*ja)[cnt++] = bs*tja[k] + l;
1215 	    }
1216           }
1217         }
1218       }
1219     }
1220 
1221     n     *= bs;
1222     nz *= bs*bs;
1223     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1224       ierr = PetscFree(tia);CHKERRQ(ierr);
1225       ierr = PetscFree(tja);CHKERRQ(ierr);
1226     }
1227   } else {
1228     *ia = tia;
1229     if (ja) *ja = tja;
1230   }
1231   if (oshift == 1) {
1232     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1233     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1240 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1241 {
1242   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1243   PetscErrorCode ierr;
1244   PetscInt       i,n = a->mbs,nz = a->i[n];
1245 
1246   PetscFunctionBegin;
1247   if (!ia) PetscFunctionReturn(0);
1248   if (!blockcompressed && A->rmap->bs > 1) {
1249     ierr = PetscFree(*ia);CHKERRQ(ierr);
1250     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1251   } else if (symmetric) {
1252     ierr = PetscFree(*ia);CHKERRQ(ierr);
1253     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1254   } else if (oshift == 1) { /* blockcompressed */
1255     for (i=0; i<n+1; i++) a->i[i]--;
1256     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1263 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1264 {
1265   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269 #if defined(PETSC_USE_LOG)
1270   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1271 #endif
1272   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1273   if (a->row) {
1274     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1275   }
1276   if (a->col) {
1277     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1280   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1281   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1282   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1283   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1284   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1286   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1287   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1288 
1289   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1290   ierr = PetscFree(a);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1306 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1307 {
1308   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   switch (op) {
1313   case MAT_ROW_ORIENTED:
1314     a->roworiented    = flg;
1315     break;
1316   case MAT_KEEP_NONZERO_PATTERN:
1317     a->keepnonzeropattern = flg;
1318     break;
1319   case MAT_NEW_NONZERO_LOCATIONS:
1320     a->nonew          = (flg ? 0 : 1);
1321     break;
1322   case MAT_NEW_NONZERO_LOCATION_ERR:
1323     a->nonew          = (flg ? -1 : 0);
1324     break;
1325   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1326     a->nonew          = (flg ? -2 : 0);
1327     break;
1328   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1329     a->nounused       = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_DIAGONALS:
1332   case MAT_IGNORE_OFF_PROC_ENTRIES:
1333   case MAT_USE_HASH_TABLE:
1334     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1335     break;
1336   case MAT_SYMMETRIC:
1337   case MAT_STRUCTURALLY_SYMMETRIC:
1338   case MAT_HERMITIAN:
1339   case MAT_SYMMETRY_ETERNAL:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   default:
1343     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1350 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1351 {
1352   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1353   PetscErrorCode ierr;
1354   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1355   MatScalar      *aa,*aa_i;
1356   PetscScalar    *v_i;
1357 
1358   PetscFunctionBegin;
1359   bs  = A->rmap->bs;
1360   ai  = a->i;
1361   aj  = a->j;
1362   aa  = a->a;
1363   bs2 = a->bs2;
1364 
1365   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1366 
1367   bn  = row/bs;   /* Block number */
1368   bp  = row % bs; /* Block Position */
1369   M   = ai[bn+1] - ai[bn];
1370   *nz = bs*M;
1371 
1372   if (v) {
1373     *v = 0;
1374     if (*nz) {
1375       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1376       for (i=0; i<M; i++) { /* for each block in the block row */
1377         v_i  = *v + i*bs;
1378         aa_i = aa + bs2*(ai[bn] + i);
1379         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1380       }
1381     }
1382   }
1383 
1384   if (idx) {
1385     *idx = 0;
1386     if (*nz) {
1387       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1388       for (i=0; i<M; i++) { /* for each block in the block row */
1389         idx_i = *idx + i*bs;
1390         itmp  = bs*aj[ai[bn] + i];
1391         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1392       }
1393     }
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1400 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401 {
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1406   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1412 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1413 {
1414   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1415   Mat            C;
1416   PetscErrorCode ierr;
1417   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1418   PetscInt       *rows,*cols,bs2=a->bs2;
1419   MatScalar      *array;
1420 
1421   PetscFunctionBegin;
1422   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1423   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1424     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1425     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1426 
1427     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1428     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1429     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1430     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1431     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1432     ierr = PetscFree(col);CHKERRQ(ierr);
1433   } else {
1434     C = *B;
1435   }
1436 
1437   array = a->a;
1438   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1439   cols = rows + bs;
1440   for (i=0; i<mbs; i++) {
1441     cols[0] = i*bs;
1442     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1443     len = ai[i+1] - ai[i];
1444     for (j=0; j<len; j++) {
1445       rows[0] = (*aj++)*bs;
1446       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1447       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1448       array += bs2;
1449     }
1450   }
1451   ierr = PetscFree(rows);CHKERRQ(ierr);
1452 
1453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455 
1456   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1457     *B = C;
1458   } else {
1459     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1466 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1467 {
1468   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1469   PetscErrorCode ierr;
1470   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1471   int            fd;
1472   PetscScalar    *aa;
1473   FILE           *file;
1474 
1475   PetscFunctionBegin;
1476   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1477   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1478   col_lens[0] = MAT_FILE_COOKIE;
1479 
1480   col_lens[1] = A->rmap->N;
1481   col_lens[2] = A->cmap->n;
1482   col_lens[3] = a->nz*bs2;
1483 
1484   /* store lengths of each row and write (including header) to file */
1485   count = 0;
1486   for (i=0; i<a->mbs; i++) {
1487     for (j=0; j<bs; j++) {
1488       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1489     }
1490   }
1491   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1492   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1493 
1494   /* store column indices (zero start index) */
1495   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1496   count = 0;
1497   for (i=0; i<a->mbs; i++) {
1498     for (j=0; j<bs; j++) {
1499       for (k=a->i[i]; k<a->i[i+1]; k++) {
1500         for (l=0; l<bs; l++) {
1501           jj[count++] = bs*a->j[k] + l;
1502         }
1503       }
1504     }
1505   }
1506   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1507   ierr = PetscFree(jj);CHKERRQ(ierr);
1508 
1509   /* store nonzero values */
1510   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1511   count = 0;
1512   for (i=0; i<a->mbs; i++) {
1513     for (j=0; j<bs; j++) {
1514       for (k=a->i[i]; k<a->i[i+1]; k++) {
1515         for (l=0; l<bs; l++) {
1516           aa[count++] = a->a[bs2*k + l*bs + j];
1517         }
1518       }
1519     }
1520   }
1521   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1522   ierr = PetscFree(aa);CHKERRQ(ierr);
1523 
1524   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1525   if (file) {
1526     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1533 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1534 {
1535   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1536   PetscErrorCode    ierr;
1537   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1538   PetscViewerFormat format;
1539 
1540   PetscFunctionBegin;
1541   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1542   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1543     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1544   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1545     Mat aij;
1546     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1547     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1548     ierr = MatDestroy(aij);CHKERRQ(ierr);
1549   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1550      PetscFunctionReturn(0);
1551   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1552     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1553     for (i=0; i<a->mbs; i++) {
1554       for (j=0; j<bs; j++) {
1555         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1556         for (k=a->i[i]; k<a->i[i+1]; k++) {
1557           for (l=0; l<bs; l++) {
1558 #if defined(PETSC_USE_COMPLEX)
1559             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1560               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1561                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1562             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1564                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1567             }
1568 #else
1569             if (a->a[bs2*k + l*bs + j] != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1571             }
1572 #endif
1573           }
1574         }
1575         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1576       }
1577     }
1578     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1579   } else {
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1581     for (i=0; i<a->mbs; i++) {
1582       for (j=0; j<bs; j++) {
1583         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1584         for (k=a->i[i]; k<a->i[i+1]; k++) {
1585           for (l=0; l<bs; l++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1588               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1589                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1590             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1591               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1592                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1593             } else {
1594               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1595             }
1596 #else
1597             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1598 #endif
1599           }
1600         }
1601         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1602       }
1603     }
1604     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1605   }
1606   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1612 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1613 {
1614   Mat            A = (Mat) Aa;
1615   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1616   PetscErrorCode ierr;
1617   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1618   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1619   MatScalar      *aa;
1620   PetscViewer    viewer;
1621 
1622   PetscFunctionBegin;
1623 
1624   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1625   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1626 
1627   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1628 
1629   /* loop over matrix elements drawing boxes */
1630   color = PETSC_DRAW_BLUE;
1631   for (i=0,row=0; i<mbs; i++,row+=bs) {
1632     for (j=a->i[i]; j<a->i[i+1]; j++) {
1633       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635       aa = a->a + j*bs2;
1636       for (k=0; k<bs; k++) {
1637         for (l=0; l<bs; l++) {
1638           if (PetscRealPart(*aa++) >=  0.) continue;
1639           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640         }
1641       }
1642     }
1643   }
1644   color = PETSC_DRAW_CYAN;
1645   for (i=0,row=0; i<mbs; i++,row+=bs) {
1646     for (j=a->i[i]; j<a->i[i+1]; j++) {
1647       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1648       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1649       aa = a->a + j*bs2;
1650       for (k=0; k<bs; k++) {
1651         for (l=0; l<bs; l++) {
1652           if (PetscRealPart(*aa++) != 0.) continue;
1653           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1654         }
1655       }
1656     }
1657   }
1658 
1659   color = PETSC_DRAW_RED;
1660   for (i=0,row=0; i<mbs; i++,row+=bs) {
1661     for (j=a->i[i]; j<a->i[i+1]; j++) {
1662       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1663       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1664       aa = a->a + j*bs2;
1665       for (k=0; k<bs; k++) {
1666         for (l=0; l<bs; l++) {
1667           if (PetscRealPart(*aa++) <= 0.) continue;
1668           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1669         }
1670       }
1671     }
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679 {
1680   PetscErrorCode ierr;
1681   PetscReal      xl,yl,xr,yr,w,h;
1682   PetscDraw      draw;
1683   PetscTruth     isnull;
1684 
1685   PetscFunctionBegin;
1686 
1687   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1688   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1689 
1690   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1691   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1692   xr += w;    yr += h;  xl = -w;     yl = -h;
1693   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1694   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1695   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ"
1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1702 {
1703   PetscErrorCode ierr;
1704   PetscTruth     iascii,isbinary,isdraw;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1708   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1709   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1710   if (iascii){
1711     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1712   } else if (isbinary) {
1713     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1714   } else if (isdraw) {
1715     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1716   } else {
1717     Mat B;
1718     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1719     ierr = MatView(B,viewer);CHKERRQ(ierr);
1720     ierr = MatDestroy(B);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1728 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1729 {
1730   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1731   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1732   PetscInt    *ai = a->i,*ailen = a->ilen;
1733   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1734   MatScalar   *ap,*aa = a->a;
1735 
1736   PetscFunctionBegin;
1737   for (k=0; k<m; k++) { /* loop over rows */
1738     row  = im[k]; brow = row/bs;
1739     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1740     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1741     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1742     nrow = ailen[brow];
1743     for (l=0; l<n; l++) { /* loop over columns */
1744       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1745       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1746       col  = in[l] ;
1747       bcol = col/bs;
1748       cidx = col%bs;
1749       ridx = row%bs;
1750       high = nrow;
1751       low  = 0; /* assume unsorted */
1752       while (high-low > 5) {
1753         t = (low+high)/2;
1754         if (rp[t] > bcol) high = t;
1755         else             low  = t;
1756       }
1757       for (i=low; i<high; i++) {
1758         if (rp[i] > bcol) break;
1759         if (rp[i] == bcol) {
1760           *v++ = ap[bs2*i+bs*cidx+ridx];
1761           goto finished;
1762         }
1763       }
1764       *v++ = 0.0;
1765       finished:;
1766     }
1767   }
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #define CHUNKSIZE 10
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1774 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1775 {
1776   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1777   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1778   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1779   PetscErrorCode    ierr;
1780   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1781   PetscTruth        roworiented=a->roworiented;
1782   const PetscScalar *value = v;
1783   MatScalar         *ap,*aa = a->a,*bap;
1784 
1785   PetscFunctionBegin;
1786   if (roworiented) {
1787     stepval = (n-1)*bs;
1788   } else {
1789     stepval = (m-1)*bs;
1790   }
1791   for (k=0; k<m; k++) { /* loop over added rows */
1792     row  = im[k];
1793     if (row < 0) continue;
1794 #if defined(PETSC_USE_DEBUG)
1795     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1796 #endif
1797     rp   = aj + ai[row];
1798     ap   = aa + bs2*ai[row];
1799     rmax = imax[row];
1800     nrow = ailen[row];
1801     low  = 0;
1802     high = nrow;
1803     for (l=0; l<n; l++) { /* loop over added columns */
1804       if (in[l] < 0) continue;
1805 #if defined(PETSC_USE_DEBUG)
1806       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1807 #endif
1808       col = in[l];
1809       if (roworiented) {
1810         value = v + k*(stepval+bs)*bs + l*bs;
1811       } else {
1812         value = v + l*(stepval+bs)*bs + k*bs;
1813       }
1814       if (col <= lastcol) low = 0; else high = nrow;
1815       lastcol = col;
1816       while (high-low > 7) {
1817         t = (low+high)/2;
1818         if (rp[t] > col) high = t;
1819         else             low  = t;
1820       }
1821       for (i=low; i<high; i++) {
1822         if (rp[i] > col) break;
1823         if (rp[i] == col) {
1824           bap  = ap +  bs2*i;
1825           if (roworiented) {
1826             if (is == ADD_VALUES) {
1827               for (ii=0; ii<bs; ii++,value+=stepval) {
1828                 for (jj=ii; jj<bs2; jj+=bs) {
1829                   bap[jj] += *value++;
1830                 }
1831               }
1832             } else {
1833               for (ii=0; ii<bs; ii++,value+=stepval) {
1834                 for (jj=ii; jj<bs2; jj+=bs) {
1835                   bap[jj] = *value++;
1836                 }
1837               }
1838             }
1839           } else {
1840             if (is == ADD_VALUES) {
1841               for (ii=0; ii<bs; ii++,value+=stepval) {
1842                 for (jj=0; jj<bs; jj++) {
1843                   *bap++ += *value++;
1844                 }
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   *bap++  = *value++;
1850                 }
1851               }
1852             }
1853           }
1854           goto noinsert2;
1855         }
1856       }
1857       if (nonew == 1) goto noinsert2;
1858       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1859       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1860       N = nrow++ - 1; high++;
1861       /* shift up all the later entries in this row */
1862       for (ii=N; ii>=i; ii--) {
1863         rp[ii+1] = rp[ii];
1864         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1865       }
1866       if (N >= i) {
1867         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1868       }
1869       rp[i] = col;
1870       bap   = ap +  bs2*i;
1871       if (roworiented) {
1872         for (ii=0; ii<bs; ii++,value+=stepval) {
1873           for (jj=ii; jj<bs2; jj+=bs) {
1874             bap[jj] = *value++;
1875           }
1876         }
1877       } else {
1878         for (ii=0; ii<bs; ii++,value+=stepval) {
1879           for (jj=0; jj<bs; jj++) {
1880             *bap++  = *value++;
1881           }
1882         }
1883       }
1884       noinsert2:;
1885       low = i;
1886     }
1887     ailen[row] = nrow;
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1895 {
1896   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1897   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1898   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1899   PetscErrorCode ierr;
1900   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1901   MatScalar      *aa = a->a,*ap;
1902   PetscReal      ratio=0.6;
1903 
1904   PetscFunctionBegin;
1905   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1906 
1907   if (m) rmax = ailen[0];
1908   for (i=1; i<mbs; i++) {
1909     /* move each row back by the amount of empty slots (fshift) before it*/
1910     fshift += imax[i-1] - ailen[i-1];
1911     rmax   = PetscMax(rmax,ailen[i]);
1912     if (fshift) {
1913       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1914       N = ailen[i];
1915       for (j=0; j<N; j++) {
1916         ip[j-fshift] = ip[j];
1917         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1918       }
1919     }
1920     ai[i] = ai[i-1] + ailen[i-1];
1921   }
1922   if (mbs) {
1923     fshift += imax[mbs-1] - ailen[mbs-1];
1924     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1925   }
1926   /* reset ilen and imax for each row */
1927   for (i=0; i<mbs; i++) {
1928     ailen[i] = imax[i] = ai[i+1] - ai[i];
1929   }
1930   a->nz = ai[mbs];
1931 
1932   /* diagonals may have moved, so kill the diagonal pointers */
1933   a->idiagvalid = PETSC_FALSE;
1934   if (fshift && a->diag) {
1935     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1936     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1937     a->diag = 0;
1938   }
1939   if (fshift && a->nounused == -1) {
1940     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1941   }
1942   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1943   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1945   a->reallocs          = 0;
1946   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1947 
1948   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1949   if (a->compressedrow.use){
1950     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1951   }
1952 
1953   A->same_nonzero = PETSC_TRUE;
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 /*
1958    This function returns an array of flags which indicate the locations of contiguous
1959    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1960    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1961    Assume: sizes should be long enough to hold all the values.
1962 */
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1965 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1966 {
1967   PetscInt   i,j,k,row;
1968   PetscTruth flg;
1969 
1970   PetscFunctionBegin;
1971   for (i=0,j=0; i<n; j++) {
1972     row = idx[i];
1973     if (row%bs!=0) { /* Not the begining of a block */
1974       sizes[j] = 1;
1975       i++;
1976     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1977       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1978       i++;
1979     } else { /* Begining of the block, so check if the complete block exists */
1980       flg = PETSC_TRUE;
1981       for (k=1; k<bs; k++) {
1982         if (row+k != idx[i+k]) { /* break in the block */
1983           flg = PETSC_FALSE;
1984           break;
1985         }
1986       }
1987       if (flg) { /* No break in the bs */
1988         sizes[j] = bs;
1989         i+= bs;
1990       } else {
1991         sizes[j] = 1;
1992         i++;
1993       }
1994     }
1995   }
1996   *bs_max = j;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2003 {
2004   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006   PetscInt       i,j,k,count,*rows;
2007   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2008   PetscScalar    zero = 0.0;
2009   MatScalar      *aa;
2010 
2011   PetscFunctionBegin;
2012   /* Make a copy of the IS and  sort it */
2013   /* allocate memory for rows,sizes */
2014   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2015   sizes = rows + is_n;
2016 
2017   /* copy IS values to rows, and sort them */
2018   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2019   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2020   if (baij->keepnonzeropattern) {
2021     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2022     bs_max = is_n;
2023     A->same_nonzero = PETSC_TRUE;
2024   } else {
2025     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2026     A->same_nonzero = PETSC_FALSE;
2027   }
2028 
2029   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2030     row   = rows[j];
2031     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2032     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2033     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2034     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2035       if (diag != 0.0) {
2036         if (baij->ilen[row/bs] > 0) {
2037           baij->ilen[row/bs]       = 1;
2038           baij->j[baij->i[row/bs]] = row/bs;
2039           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2040         }
2041         /* Now insert all the diagonal values for this bs */
2042         for (k=0; k<bs; k++) {
2043           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2044         }
2045       } else { /* (diag == 0.0) */
2046         baij->ilen[row/bs] = 0;
2047       } /* end (diag == 0.0) */
2048     } else { /* (sizes[i] != bs) */
2049 #if defined (PETSC_USE_DEBUG)
2050       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2051 #endif
2052       for (k=0; k<count; k++) {
2053         aa[0] =  zero;
2054         aa    += bs;
2055       }
2056       if (diag != 0.0) {
2057         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2058       }
2059     }
2060   }
2061 
2062   ierr = PetscFree(rows);CHKERRQ(ierr);
2063   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2069 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2070 {
2071   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2072   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2073   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2074   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2075   PetscErrorCode ierr;
2076   PetscInt       ridx,cidx,bs2=a->bs2;
2077   PetscTruth     roworiented=a->roworiented;
2078   MatScalar      *ap,value,*aa=a->a,*bap;
2079 
2080   PetscFunctionBegin;
2081   for (k=0; k<m; k++) { /* loop over added rows */
2082     row  = im[k];
2083     brow = row/bs;
2084     if (row < 0) continue;
2085 #if defined(PETSC_USE_DEBUG)
2086     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2087 #endif
2088     rp   = aj + ai[brow];
2089     ap   = aa + bs2*ai[brow];
2090     rmax = imax[brow];
2091     nrow = ailen[brow];
2092     low  = 0;
2093     high = nrow;
2094     for (l=0; l<n; l++) { /* loop over added columns */
2095       if (in[l] < 0) continue;
2096 #if defined(PETSC_USE_DEBUG)
2097       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2098 #endif
2099       col = in[l]; bcol = col/bs;
2100       ridx = row % bs; cidx = col % bs;
2101       if (roworiented) {
2102         value = v[l + k*n];
2103       } else {
2104         value = v[k + l*m];
2105       }
2106       if (col <= lastcol) low = 0; else high = nrow;
2107       lastcol = col;
2108       while (high-low > 7) {
2109         t = (low+high)/2;
2110         if (rp[t] > bcol) high = t;
2111         else              low  = t;
2112       }
2113       for (i=low; i<high; i++) {
2114         if (rp[i] > bcol) break;
2115         if (rp[i] == bcol) {
2116           bap  = ap +  bs2*i + bs*cidx + ridx;
2117           if (is == ADD_VALUES) *bap += value;
2118           else                  *bap  = value;
2119           goto noinsert1;
2120         }
2121       }
2122       if (nonew == 1) goto noinsert1;
2123       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2124       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2125       N = nrow++ - 1; high++;
2126       /* shift up all the later entries in this row */
2127       for (ii=N; ii>=i; ii--) {
2128         rp[ii+1] = rp[ii];
2129         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2130       }
2131       if (N>=i) {
2132         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2133       }
2134       rp[i]                      = bcol;
2135       ap[bs2*i + bs*cidx + ridx] = value;
2136       a->nz++;
2137       noinsert1:;
2138       low = i;
2139     }
2140     ailen[brow] = nrow;
2141   }
2142   A->same_nonzero = PETSC_FALSE;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2150 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2151 {
2152   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2153   Mat            outA;
2154   PetscErrorCode ierr;
2155   PetscTruth     row_identity,col_identity;
2156 
2157   PetscFunctionBegin;
2158   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2159   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2160   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2161   if (!row_identity || !col_identity) {
2162     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2163   }
2164 
2165   outA          = inA;
2166   inA->factor   = MAT_FACTOR_LU;
2167 
2168   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2169 
2170   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2171   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2172   a->row = row;
2173   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2174   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2175   a->col = col;
2176 
2177   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2178   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2180 
2181   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2182   if (!a->solve_work) {
2183     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2184     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2185   }
2186   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2187 
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 EXTERN_C_BEGIN
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2194 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2195 {
2196   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2197   PetscInt    i,nz,mbs;
2198 
2199   PetscFunctionBegin;
2200   nz  = baij->maxnz/baij->bs2;
2201   mbs = baij->mbs;
2202   for (i=0; i<nz; i++) {
2203     baij->j[i] = indices[i];
2204   }
2205   baij->nz = nz;
2206   for (i=0; i<mbs; i++) {
2207     baij->ilen[i] = baij->imax[i];
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 EXTERN_C_END
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2215 /*@
2216     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2217        in the matrix.
2218 
2219   Input Parameters:
2220 +  mat - the SeqBAIJ matrix
2221 -  indices - the column indices
2222 
2223   Level: advanced
2224 
2225   Notes:
2226     This can be called if you have precomputed the nonzero structure of the
2227   matrix and want to provide it to the matrix object to improve the performance
2228   of the MatSetValues() operation.
2229 
2230     You MUST have set the correct numbers of nonzeros per row in the call to
2231   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2232 
2233     MUST be called before any calls to MatSetValues();
2234 
2235 @*/
2236 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2237 {
2238   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2239 
2240   PetscFunctionBegin;
2241   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2242   PetscValidPointer(indices,2);
2243   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2244   if (f) {
2245     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2246   } else {
2247     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2254 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2255 {
2256   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2257   PetscErrorCode ierr;
2258   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2259   PetscReal      atmp;
2260   PetscScalar    *x,zero = 0.0;
2261   MatScalar      *aa;
2262   PetscInt       ncols,brow,krow,kcol;
2263 
2264   PetscFunctionBegin;
2265   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2266   bs   = A->rmap->bs;
2267   aa   = a->a;
2268   ai   = a->i;
2269   aj   = a->j;
2270   mbs  = a->mbs;
2271 
2272   ierr = VecSet(v,zero);CHKERRQ(ierr);
2273   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2274   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2275   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2276   for (i=0; i<mbs; i++) {
2277     ncols = ai[1] - ai[0]; ai++;
2278     brow  = bs*i;
2279     for (j=0; j<ncols; j++){
2280       for (kcol=0; kcol<bs; kcol++){
2281         for (krow=0; krow<bs; krow++){
2282           atmp = PetscAbsScalar(*aa);aa++;
2283           row = brow + krow;    /* row index */
2284           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2285           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2286         }
2287       }
2288       aj++;
2289     }
2290   }
2291   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatCopy_SeqBAIJ"
2297 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   /* If the two matrices have the same copy implementation, use fast copy. */
2303   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2304     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2305     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2306 
2307     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2308       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2309     }
2310     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2311   } else {
2312     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2319 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2320 {
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2330 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2331 {
2332   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2333   PetscFunctionBegin;
2334   *array = a->a;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2340 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2341 {
2342   PetscFunctionBegin;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #include "petscblaslapack.h"
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2349 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2350 {
2351   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2352   PetscErrorCode ierr;
2353   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2354   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2355 
2356   PetscFunctionBegin;
2357   if (str == SAME_NONZERO_PATTERN) {
2358     PetscScalar alpha = a;
2359     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2360   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2361     if (y->xtoy && y->XtoY != X) {
2362       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2363       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2364     }
2365     if (!y->xtoy) { /* get xtoy */
2366       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2367       y->XtoY = X;
2368       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2369     }
2370     bs2 = bs*bs;
2371     for (i=0; i<x->nz; i++) {
2372       j = 0;
2373       while (j < bs2){
2374         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2375         j++;
2376       }
2377     }
2378     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2379   } else {
2380     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2387 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2388 {
2389   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2390   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2391   MatScalar      *aa = a->a;
2392 
2393   PetscFunctionBegin;
2394   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2400 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2401 {
2402   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2403   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2404   MatScalar      *aa = a->a;
2405 
2406   PetscFunctionBegin;
2407   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 
2412 /* -------------------------------------------------------------------*/
2413 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2414        MatGetRow_SeqBAIJ,
2415        MatRestoreRow_SeqBAIJ,
2416        MatMult_SeqBAIJ_N,
2417 /* 4*/ MatMultAdd_SeqBAIJ_N,
2418        MatMultTranspose_SeqBAIJ,
2419        MatMultTransposeAdd_SeqBAIJ,
2420        0,
2421        0,
2422        0,
2423 /*10*/ 0,
2424        MatLUFactor_SeqBAIJ,
2425        0,
2426        0,
2427        MatTranspose_SeqBAIJ,
2428 /*15*/ MatGetInfo_SeqBAIJ,
2429        MatEqual_SeqBAIJ,
2430        MatGetDiagonal_SeqBAIJ,
2431        MatDiagonalScale_SeqBAIJ,
2432        MatNorm_SeqBAIJ,
2433 /*20*/ 0,
2434        MatAssemblyEnd_SeqBAIJ,
2435        MatSetOption_SeqBAIJ,
2436        MatZeroEntries_SeqBAIJ,
2437 /*24*/ MatZeroRows_SeqBAIJ,
2438        0,
2439        0,
2440        0,
2441        0,
2442 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2443        0,
2444        0,
2445        MatGetArray_SeqBAIJ,
2446        MatRestoreArray_SeqBAIJ,
2447 /*34*/ MatDuplicate_SeqBAIJ,
2448        0,
2449        0,
2450        MatILUFactor_SeqBAIJ,
2451        0,
2452 /*39*/ MatAXPY_SeqBAIJ,
2453        MatGetSubMatrices_SeqBAIJ,
2454        MatIncreaseOverlap_SeqBAIJ,
2455        MatGetValues_SeqBAIJ,
2456        MatCopy_SeqBAIJ,
2457 /*44*/ 0,
2458        MatScale_SeqBAIJ,
2459        0,
2460        0,
2461        MatILUDTFactor_SeqBAIJ,
2462 /*49*/ 0,
2463        MatGetRowIJ_SeqBAIJ,
2464        MatRestoreRowIJ_SeqBAIJ,
2465        0,
2466        0,
2467 /*54*/ 0,
2468        0,
2469        0,
2470        0,
2471        MatSetValuesBlocked_SeqBAIJ,
2472 /*59*/ MatGetSubMatrix_SeqBAIJ,
2473        MatDestroy_SeqBAIJ,
2474        MatView_SeqBAIJ,
2475        0,
2476        0,
2477 /*64*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2483        0,
2484        MatConvert_Basic,
2485        0,
2486        0,
2487 /*74*/ 0,
2488        0,
2489        0,
2490        0,
2491        0,
2492 /*79*/ 0,
2493        0,
2494        0,
2495        0,
2496        MatLoad_SeqBAIJ,
2497 /*84*/ 0,
2498        0,
2499        0,
2500        0,
2501        0,
2502 /*89*/ 0,
2503        0,
2504        0,
2505        0,
2506        0,
2507 /*94*/ 0,
2508        0,
2509        0,
2510        0,
2511        0,
2512 /*99*/0,
2513        0,
2514        0,
2515        0,
2516        0,
2517 /*104*/0,
2518        MatRealPart_SeqBAIJ,
2519        MatImaginaryPart_SeqBAIJ,
2520        0,
2521        0,
2522 /*109*/0,
2523        0,
2524        0,
2525        0,
2526        MatMissingDiagonal_SeqBAIJ
2527 /*114*/
2528 };
2529 
2530 EXTERN_C_BEGIN
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2533 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2534 {
2535   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2536   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2537   PetscErrorCode ierr;
2538 
2539   PetscFunctionBegin;
2540   if (aij->nonew != 1) {
2541     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2542   }
2543 
2544   /* allocate space for values if not already there */
2545   if (!aij->saved_values) {
2546     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2547   }
2548 
2549   /* copy values over */
2550   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 EXTERN_C_END
2554 
2555 EXTERN_C_BEGIN
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2558 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2559 {
2560   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2561   PetscErrorCode ierr;
2562   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2563 
2564   PetscFunctionBegin;
2565   if (aij->nonew != 1) {
2566     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2567   }
2568   if (!aij->saved_values) {
2569     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2570   }
2571 
2572   /* copy values over */
2573   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 EXTERN_C_END
2577 
2578 EXTERN_C_BEGIN
2579 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2580 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2581 EXTERN_C_END
2582 
2583 EXTERN_C_BEGIN
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2586 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2587 {
2588   Mat_SeqBAIJ    *b;
2589   PetscErrorCode ierr;
2590   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2591   PetscTruth     flg,skipallocation = PETSC_FALSE;
2592 
2593   PetscFunctionBegin;
2594 
2595   if (nz == MAT_SKIP_ALLOCATION) {
2596     skipallocation = PETSC_TRUE;
2597     nz             = 0;
2598   }
2599 
2600   if (bs < 0) {
2601     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2602       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2603     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2604     bs   = PetscAbs(bs);
2605   }
2606   if (nnz && newbs != bs) {
2607     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2608   }
2609   bs   = newbs;
2610 
2611   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2612   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2613   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2614   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2615 
2616   B->preallocated = PETSC_TRUE;
2617 
2618   mbs  = B->rmap->n/bs;
2619   nbs  = B->cmap->n/bs;
2620   bs2  = bs*bs;
2621 
2622   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2623     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2624   }
2625 
2626   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2627   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2628   if (nnz) {
2629     for (i=0; i<mbs; i++) {
2630       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2631       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2632     }
2633   }
2634 
2635   b       = (Mat_SeqBAIJ*)B->data;
2636   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2637     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2638   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2639 
2640   if (!flg) {
2641     switch (bs) {
2642     case 1:
2643       B->ops->mult            = MatMult_SeqBAIJ_1;
2644       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2645       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2646       break;
2647     case 2:
2648       B->ops->mult            = MatMult_SeqBAIJ_2;
2649       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2650       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2651       break;
2652     case 3:
2653       B->ops->mult            = MatMult_SeqBAIJ_3;
2654       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2655       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2656       break;
2657     case 4:
2658       B->ops->mult            = MatMult_SeqBAIJ_4;
2659       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2660       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2661       break;
2662     case 5:
2663       B->ops->mult            = MatMult_SeqBAIJ_5;
2664       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2665       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2666       break;
2667     case 6:
2668       B->ops->mult            = MatMult_SeqBAIJ_6;
2669       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2670       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2671       break;
2672     case 7:
2673       B->ops->mult            = MatMult_SeqBAIJ_7;
2674       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2675       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2676       break;
2677     default:
2678       B->ops->mult            = MatMult_SeqBAIJ_N;
2679       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2680       break;
2681     }
2682   }
2683   B->rmap->bs      = bs;
2684   b->mbs     = mbs;
2685   b->nbs     = nbs;
2686   if (!skipallocation) {
2687     if (!b->imax) {
2688       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2689     }
2690     /* b->ilen will count nonzeros in each block row so far. */
2691     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2692     if (!nnz) {
2693       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2694       else if (nz <= 0)        nz = 1;
2695       for (i=0; i<mbs; i++) b->imax[i] = nz;
2696       nz = nz*mbs;
2697     } else {
2698       nz = 0;
2699       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2700     }
2701 
2702     /* allocate the matrix space */
2703     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2704     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2705     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2706     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2707     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2708     b->singlemalloc = PETSC_TRUE;
2709     b->i[0] = 0;
2710     for (i=1; i<mbs+1; i++) {
2711       b->i[i] = b->i[i-1] + b->imax[i-1];
2712     }
2713     b->free_a     = PETSC_TRUE;
2714     b->free_ij    = PETSC_TRUE;
2715   } else {
2716     b->free_a     = PETSC_FALSE;
2717     b->free_ij    = PETSC_FALSE;
2718   }
2719 
2720   B->rmap->bs          = bs;
2721   b->bs2              = bs2;
2722   b->mbs              = mbs;
2723   b->nz               = 0;
2724   b->maxnz            = nz*bs2;
2725   B->info.nz_unneeded = (PetscReal)b->maxnz;
2726   PetscFunctionReturn(0);
2727 }
2728 EXTERN_C_END
2729 
2730 EXTERN_C_BEGIN
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2733 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2734 {
2735   PetscInt       i,m,nz,nz_max=0,*nnz;
2736   PetscScalar    *values=0;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740 
2741   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2742 
2743   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2744   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2745   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2746   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2747   m = B->rmap->n/bs;
2748 
2749   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2750   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2751   for(i=0; i<m; i++) {
2752     nz = ii[i+1]- ii[i];
2753     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2754     nz_max = PetscMax(nz_max, nz);
2755     nnz[i] = nz;
2756   }
2757   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2758   ierr = PetscFree(nnz);CHKERRQ(ierr);
2759 
2760   values = (PetscScalar*)V;
2761   if (!values) {
2762     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2763     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2764   }
2765   for (i=0; i<m; i++) {
2766     PetscInt          ncols  = ii[i+1] - ii[i];
2767     const PetscInt    *icols = jj + ii[i];
2768     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2769     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2770   }
2771   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2772   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774 
2775   PetscFunctionReturn(0);
2776 }
2777 EXTERN_C_END
2778 
2779 
2780 EXTERN_C_BEGIN
2781 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2782 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2783 EXTERN_C_END
2784 
2785 /*MC
2786    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2787    block sparse compressed row format.
2788 
2789    Options Database Keys:
2790 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2791 
2792   Level: beginner
2793 
2794 .seealso: MatCreateSeqBAIJ()
2795 M*/
2796 
2797 
2798 EXTERN_C_BEGIN
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatCreate_SeqBAIJ"
2801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2802 {
2803   PetscErrorCode ierr;
2804   PetscMPIInt    size;
2805   Mat_SeqBAIJ    *b;
2806 
2807   PetscFunctionBegin;
2808   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2809   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2810 
2811   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2812   B->data = (void*)b;
2813   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2814   B->mapping               = 0;
2815   b->row                   = 0;
2816   b->col                   = 0;
2817   b->icol                  = 0;
2818   b->reallocs              = 0;
2819   b->saved_values          = 0;
2820 
2821   b->roworiented           = PETSC_TRUE;
2822   b->nonew                 = 0;
2823   b->diag                  = 0;
2824   b->solve_work            = 0;
2825   b->mult_work             = 0;
2826   B->spptr                 = 0;
2827   B->info.nz_unneeded      = (PetscReal)b->maxnz;
2828   b->keepnonzeropattern    = PETSC_FALSE;
2829   b->xtoy                  = 0;
2830   b->XtoY                  = 0;
2831   b->compressedrow.use     = PETSC_FALSE;
2832   b->compressedrow.nrows   = 0;
2833   b->compressedrow.i       = PETSC_NULL;
2834   b->compressedrow.rindex  = PETSC_NULL;
2835   b->compressedrow.checked = PETSC_FALSE;
2836   B->same_nonzero          = PETSC_FALSE;
2837 
2838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2839                                      "MatGetFactorAvailable_seqbaij_petsc",
2840                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2842                                      "MatGetFactor_seqbaij_petsc",
2843                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2845                                      "MatInvertBlockDiagonal_SeqBAIJ",
2846                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2848                                      "MatStoreValues_SeqBAIJ",
2849                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2851                                      "MatRetrieveValues_SeqBAIJ",
2852                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2854                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2855                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2857                                      "MatConvert_SeqBAIJ_SeqAIJ",
2858                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2860                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2861                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2863                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2864                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2866                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2867                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2868   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 EXTERN_C_END
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2875 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2876 {
2877   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2878   PetscErrorCode ierr;
2879   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2880 
2881   PetscFunctionBegin;
2882   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2883 
2884   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2885   for (i=0; i<mbs; i++) {
2886     c->imax[i] = a->imax[i];
2887     c->ilen[i] = a->ilen[i];
2888   }
2889 
2890   /* allocate the matrix space */
2891   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2892   c->singlemalloc = PETSC_TRUE;
2893   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2894   if (mbs > 0) {
2895     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2896     if (cpvalues == MAT_COPY_VALUES) {
2897       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2898     } else {
2899       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2900     }
2901   }
2902   c->roworiented = a->roworiented;
2903   c->nonew       = a->nonew;
2904   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2905   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2906   c->bs2         = a->bs2;
2907   c->mbs         = a->mbs;
2908   c->nbs         = a->nbs;
2909 
2910   if (a->diag) {
2911     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2912     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2913     for (i=0; i<mbs; i++) {
2914       c->diag[i] = a->diag[i];
2915     }
2916   } else c->diag        = 0;
2917   c->nz                 = a->nz;
2918   c->maxnz              = a->maxnz;
2919   c->solve_work         = 0;
2920   c->mult_work          = 0;
2921   c->free_a             = PETSC_TRUE;
2922   c->free_ij            = PETSC_TRUE;
2923   C->preallocated       = PETSC_TRUE;
2924   C->assembled          = PETSC_TRUE;
2925 
2926   c->compressedrow.use     = a->compressedrow.use;
2927   c->compressedrow.nrows   = a->compressedrow.nrows;
2928   c->compressedrow.checked = a->compressedrow.checked;
2929   if ( a->compressedrow.checked && a->compressedrow.use){
2930     i = a->compressedrow.nrows;
2931     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2932     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2933     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2934     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2935   } else {
2936     c->compressedrow.use    = PETSC_FALSE;
2937     c->compressedrow.i      = PETSC_NULL;
2938     c->compressedrow.rindex = PETSC_NULL;
2939   }
2940   C->same_nonzero = A->same_nonzero;
2941   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2947 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2948 {
2949     PetscErrorCode ierr;
2950 
2951   PetscFunctionBegin;
2952   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2953   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2954   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2955   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatLoad_SeqBAIJ"
2961 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2962 {
2963   Mat_SeqBAIJ    *a;
2964   Mat            B;
2965   PetscErrorCode ierr;
2966   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2967   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2968   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2969   PetscInt       *masked,nmask,tmp,bs2,ishift;
2970   PetscMPIInt    size;
2971   int            fd;
2972   PetscScalar    *aa;
2973   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2977     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2978   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2979   bs2  = bs*bs;
2980 
2981   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2982   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2983   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2984   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2985   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2986   M = header[1]; N = header[2]; nz = header[3];
2987 
2988   if (header[3] < 0) {
2989     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2990   }
2991 
2992   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2993 
2994   /*
2995      This code adds extra rows to make sure the number of rows is
2996     divisible by the blocksize
2997   */
2998   mbs        = M/bs;
2999   extra_rows = bs - M + bs*(mbs);
3000   if (extra_rows == bs) extra_rows = 0;
3001   else                  mbs++;
3002   if (extra_rows) {
3003     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3004   }
3005 
3006   /* read in row lengths */
3007   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3008   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3009   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3010 
3011   /* read in column indices */
3012   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3013   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3014   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3015 
3016   /* loop over row lengths determining block row lengths */
3017   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3018   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3019   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3020   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3021   masked   = mask + mbs;
3022   rowcount = 0; nzcount = 0;
3023   for (i=0; i<mbs; i++) {
3024     nmask = 0;
3025     for (j=0; j<bs; j++) {
3026       kmax = rowlengths[rowcount];
3027       for (k=0; k<kmax; k++) {
3028         tmp = jj[nzcount++]/bs;
3029         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3030       }
3031       rowcount++;
3032     }
3033     browlengths[i] += nmask;
3034     /* zero out the mask elements we set */
3035     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3036   }
3037 
3038   /* create our matrix */
3039   ierr = MatCreate(comm,&B);
3040   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3041   ierr = MatSetType(B,type);CHKERRQ(ierr);
3042   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3043   a = (Mat_SeqBAIJ*)B->data;
3044 
3045   /* set matrix "i" values */
3046   a->i[0] = 0;
3047   for (i=1; i<= mbs; i++) {
3048     a->i[i]      = a->i[i-1] + browlengths[i-1];
3049     a->ilen[i-1] = browlengths[i-1];
3050   }
3051   a->nz         = 0;
3052   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3053 
3054   /* read in nonzero values */
3055   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3056   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3057   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3058 
3059   /* set "a" and "j" values into matrix */
3060   nzcount = 0; jcount = 0;
3061   for (i=0; i<mbs; i++) {
3062     nzcountb = nzcount;
3063     nmask    = 0;
3064     for (j=0; j<bs; j++) {
3065       kmax = rowlengths[i*bs+j];
3066       for (k=0; k<kmax; k++) {
3067         tmp = jj[nzcount++]/bs;
3068 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3069       }
3070     }
3071     /* sort the masked values */
3072     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3073 
3074     /* set "j" values into matrix */
3075     maskcount = 1;
3076     for (j=0; j<nmask; j++) {
3077       a->j[jcount++]  = masked[j];
3078       mask[masked[j]] = maskcount++;
3079     }
3080     /* set "a" values into matrix */
3081     ishift = bs2*a->i[i];
3082     for (j=0; j<bs; j++) {
3083       kmax = rowlengths[i*bs+j];
3084       for (k=0; k<kmax; k++) {
3085         tmp       = jj[nzcountb]/bs ;
3086         block     = mask[tmp] - 1;
3087         point     = jj[nzcountb] - bs*tmp;
3088         idx       = ishift + bs2*block + j + bs*point;
3089         a->a[idx] = (MatScalar)aa[nzcountb++];
3090       }
3091     }
3092     /* zero out the mask elements we set */
3093     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3094   }
3095   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3096 
3097   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3098   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3099   ierr = PetscFree(aa);CHKERRQ(ierr);
3100   ierr = PetscFree(jj);CHKERRQ(ierr);
3101   ierr = PetscFree(mask);CHKERRQ(ierr);
3102 
3103   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   ierr = MatView_Private(B);CHKERRQ(ierr);
3106 
3107   *A = B;
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 #undef __FUNCT__
3112 #define __FUNCT__ "MatCreateSeqBAIJ"
3113 /*@C
3114    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3115    compressed row) format.  For good matrix assembly performance the
3116    user should preallocate the matrix storage by setting the parameter nz
3117    (or the array nnz).  By setting these parameters accurately, performance
3118    during matrix assembly can be increased by more than a factor of 50.
3119 
3120    Collective on MPI_Comm
3121 
3122    Input Parameters:
3123 +  comm - MPI communicator, set to PETSC_COMM_SELF
3124 .  bs - size of block
3125 .  m - number of rows
3126 .  n - number of columns
3127 .  nz - number of nonzero blocks  per block row (same for all rows)
3128 -  nnz - array containing the number of nonzero blocks in the various block rows
3129          (possibly different for each block row) or PETSC_NULL
3130 
3131    Output Parameter:
3132 .  A - the matrix
3133 
3134    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3135    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3136    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3137 
3138    Options Database Keys:
3139 .   -mat_no_unroll - uses code that does not unroll the loops in the
3140                      block calculations (much slower)
3141 .    -mat_block_size - size of the blocks to use
3142 
3143    Level: intermediate
3144 
3145    Notes:
3146    The number of rows and columns must be divisible by blocksize.
3147 
3148    If the nnz parameter is given then the nz parameter is ignored
3149 
3150    A nonzero block is any block that as 1 or more nonzeros in it
3151 
3152    The block AIJ format is fully compatible with standard Fortran 77
3153    storage.  That is, the stored row and column indices can begin at
3154    either one (as in Fortran) or zero.  See the users' manual for details.
3155 
3156    Specify the preallocated storage with either nz or nnz (not both).
3157    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3158    allocation.  For additional details, see the users manual chapter on
3159    matrices.
3160 
3161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3162 @*/
3163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3164 {
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3169   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3170   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3171   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3177 /*@C
3178    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3179    per row in the matrix. For good matrix assembly performance the
3180    user should preallocate the matrix storage by setting the parameter nz
3181    (or the array nnz).  By setting these parameters accurately, performance
3182    during matrix assembly can be increased by more than a factor of 50.
3183 
3184    Collective on MPI_Comm
3185 
3186    Input Parameters:
3187 +  A - the matrix
3188 .  bs - size of block
3189 .  nz - number of block nonzeros per block row (same for all rows)
3190 -  nnz - array containing the number of block nonzeros in the various block rows
3191          (possibly different for each block row) or PETSC_NULL
3192 
3193    Options Database Keys:
3194 .   -mat_no_unroll - uses code that does not unroll the loops in the
3195                      block calculations (much slower)
3196 .    -mat_block_size - size of the blocks to use
3197 
3198    Level: intermediate
3199 
3200    Notes:
3201    If the nnz parameter is given then the nz parameter is ignored
3202 
3203    You can call MatGetInfo() to get information on how effective the preallocation was;
3204    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3205    You can also run with the option -info and look for messages with the string
3206    malloc in them to see if additional memory allocation was needed.
3207 
3208    The block AIJ format is fully compatible with standard Fortran 77
3209    storage.  That is, the stored row and column indices can begin at
3210    either one (as in Fortran) or zero.  See the users' manual for details.
3211 
3212    Specify the preallocated storage with either nz or nnz (not both).
3213    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3214    allocation.  For additional details, see the users manual chapter on
3215    matrices.
3216 
3217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3218 @*/
3219 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3220 {
3221   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3222 
3223   PetscFunctionBegin;
3224   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3225   if (f) {
3226     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3227   }
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 #undef __FUNCT__
3232 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3233 /*@C
3234    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3235    (the default sequential PETSc format).
3236 
3237    Collective on MPI_Comm
3238 
3239    Input Parameters:
3240 +  A - the matrix
3241 .  i - the indices into j for the start of each local row (starts with zero)
3242 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3243 -  v - optional values in the matrix
3244 
3245    Level: developer
3246 
3247 .keywords: matrix, aij, compressed row, sparse
3248 
3249 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3250 @*/
3251 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3252 {
3253   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3254 
3255   PetscFunctionBegin;
3256   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3257   if (f) {
3258     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3259   }
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3266 /*@
3267      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3268               (upper triangular entries in CSR format) provided by the user.
3269 
3270      Collective on MPI_Comm
3271 
3272    Input Parameters:
3273 +  comm - must be an MPI communicator of size 1
3274 .  bs - size of block
3275 .  m - number of rows
3276 .  n - number of columns
3277 .  i - row indices
3278 .  j - column indices
3279 -  a - matrix values
3280 
3281    Output Parameter:
3282 .  mat - the matrix
3283 
3284    Level: intermediate
3285 
3286    Notes:
3287        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3288     once the matrix is destroyed
3289 
3290        You cannot set new nonzero locations into this matrix, that will generate an error.
3291 
3292        The i and j indices are 0 based
3293 
3294 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3295 
3296 @*/
3297 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3298 {
3299   PetscErrorCode ierr;
3300   PetscInt       ii;
3301   Mat_SeqBAIJ    *baij;
3302 
3303   PetscFunctionBegin;
3304   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3305   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3306 
3307   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3308   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3309   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3310   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3311   baij = (Mat_SeqBAIJ*)(*mat)->data;
3312   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3313 
3314   baij->i = i;
3315   baij->j = j;
3316   baij->a = a;
3317   baij->singlemalloc = PETSC_FALSE;
3318   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3319   baij->free_a       = PETSC_FALSE;
3320   baij->free_ij       = PETSC_FALSE;
3321 
3322   for (ii=0; ii<m; ii++) {
3323     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3324 #if defined(PETSC_USE_DEBUG)
3325     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3326 #endif
3327   }
3328 #if defined(PETSC_USE_DEBUG)
3329   for (ii=0; ii<baij->i[m]; ii++) {
3330     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3331     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3332   }
3333 #endif
3334 
3335   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3337   PetscFunctionReturn(0);
3338 }
3339