xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 633214b287bcbb44a28f904dbd325fe22fb02dbd)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "../src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "../src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap->bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1264   ierr = PetscFree(a);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   switch (op) {
1287   case MAT_ROW_ORIENTED:
1288     a->roworiented    = flg;
1289     break;
1290   case MAT_KEEP_ZEROED_ROWS:
1291     a->keepzeroedrows = flg;
1292     break;
1293   case MAT_NEW_NONZERO_LOCATIONS:
1294     a->nonew          = (flg ? 0 : 1);
1295     break;
1296   case MAT_NEW_NONZERO_LOCATION_ERR:
1297     a->nonew          = (flg ? -1 : 0);
1298     break;
1299   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1300     a->nonew          = (flg ? -2 : 0);
1301     break;
1302   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1303     a->nounused       = (flg ? -1 : 0);
1304     break;
1305   case MAT_NEW_DIAGONALS:
1306   case MAT_IGNORE_OFF_PROC_ENTRIES:
1307   case MAT_USE_HASH_TABLE:
1308     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1309     break;
1310   case MAT_SYMMETRIC:
1311   case MAT_STRUCTURALLY_SYMMETRIC:
1312   case MAT_HERMITIAN:
1313   case MAT_SYMMETRY_ETERNAL:
1314     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1315     break;
1316   default:
1317     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1329   MatScalar      *aa,*aa_i;
1330   PetscScalar    *v_i;
1331 
1332   PetscFunctionBegin;
1333   bs  = A->rmap->bs;
1334   ai  = a->i;
1335   aj  = a->j;
1336   aa  = a->a;
1337   bs2 = a->bs2;
1338 
1339   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1340 
1341   bn  = row/bs;   /* Block number */
1342   bp  = row % bs; /* Block Position */
1343   M   = ai[bn+1] - ai[bn];
1344   *nz = bs*M;
1345 
1346   if (v) {
1347     *v = 0;
1348     if (*nz) {
1349       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1350       for (i=0; i<M; i++) { /* for each block in the block row */
1351         v_i  = *v + i*bs;
1352         aa_i = aa + bs2*(ai[bn] + i);
1353         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1354       }
1355     }
1356   }
1357 
1358   if (idx) {
1359     *idx = 0;
1360     if (*nz) {
1361       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1362       for (i=0; i<M; i++) { /* for each block in the block row */
1363         idx_i = *idx + i*bs;
1364         itmp  = bs*aj[ai[bn] + i];
1365         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1366       }
1367     }
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1374 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1375 {
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1380   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1386 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1387 {
1388   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1389   Mat            C;
1390   PetscErrorCode ierr;
1391   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1392   PetscInt       *rows,*cols,bs2=a->bs2;
1393   MatScalar      *array;
1394 
1395   PetscFunctionBegin;
1396   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1398     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1399     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1400 
1401     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1402     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1403     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1404     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1405     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1406     ierr = PetscFree(col);CHKERRQ(ierr);
1407   } else {
1408     C = *B;
1409   }
1410 
1411   array = a->a;
1412   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1413   cols = rows + bs;
1414   for (i=0; i<mbs; i++) {
1415     cols[0] = i*bs;
1416     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1417     len = ai[i+1] - ai[i];
1418     for (j=0; j<len; j++) {
1419       rows[0] = (*aj++)*bs;
1420       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1421       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1422       array += bs2;
1423     }
1424   }
1425   ierr = PetscFree(rows);CHKERRQ(ierr);
1426 
1427   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429 
1430   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1431     *B = C;
1432   } else {
1433     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1440 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1441 {
1442   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1443   PetscErrorCode ierr;
1444   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1445   int            fd;
1446   PetscScalar    *aa;
1447   FILE           *file;
1448 
1449   PetscFunctionBegin;
1450   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1451   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1452   col_lens[0] = MAT_FILE_COOKIE;
1453 
1454   col_lens[1] = A->rmap->N;
1455   col_lens[2] = A->cmap->n;
1456   col_lens[3] = a->nz*bs2;
1457 
1458   /* store lengths of each row and write (including header) to file */
1459   count = 0;
1460   for (i=0; i<a->mbs; i++) {
1461     for (j=0; j<bs; j++) {
1462       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1463     }
1464   }
1465   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1466   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1467 
1468   /* store column indices (zero start index) */
1469   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1470   count = 0;
1471   for (i=0; i<a->mbs; i++) {
1472     for (j=0; j<bs; j++) {
1473       for (k=a->i[i]; k<a->i[i+1]; k++) {
1474         for (l=0; l<bs; l++) {
1475           jj[count++] = bs*a->j[k] + l;
1476         }
1477       }
1478     }
1479   }
1480   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1481   ierr = PetscFree(jj);CHKERRQ(ierr);
1482 
1483   /* store nonzero values */
1484   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1485   count = 0;
1486   for (i=0; i<a->mbs; i++) {
1487     for (j=0; j<bs; j++) {
1488       for (k=a->i[i]; k<a->i[i+1]; k++) {
1489         for (l=0; l<bs; l++) {
1490           aa[count++] = a->a[bs2*k + l*bs + j];
1491         }
1492       }
1493     }
1494   }
1495   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1496   ierr = PetscFree(aa);CHKERRQ(ierr);
1497 
1498   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1499   if (file) {
1500     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1507 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1508 {
1509   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1510   PetscErrorCode    ierr;
1511   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1512   PetscViewerFormat format;
1513 
1514   PetscFunctionBegin;
1515   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1516   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1517     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1518   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1519     Mat aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1522     ierr = MatDestroy(aij);CHKERRQ(ierr);
1523   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1524      PetscFunctionReturn(0);
1525   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1526     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1527     for (i=0; i<a->mbs; i++) {
1528       for (j=0; j<bs; j++) {
1529         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1530         for (k=a->i[i]; k<a->i[i+1]; k++) {
1531           for (l=0; l<bs; l++) {
1532 #if defined(PETSC_USE_COMPLEX)
1533             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1535                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1538                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1539             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             }
1542 #else
1543             if (a->a[bs2*k + l*bs + j] != 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1545             }
1546 #endif
1547           }
1548         }
1549         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1550       }
1551     }
1552     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1553   } else {
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1555     for (i=0; i<a->mbs; i++) {
1556       for (j=0; j<bs; j++) {
1557         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1558         for (k=a->i[i]; k<a->i[i+1]; k++) {
1559           for (l=0; l<bs; l++) {
1560 #if defined(PETSC_USE_COMPLEX)
1561             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1563                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1566                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1567             } else {
1568               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             }
1570 #else
1571             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1572 #endif
1573           }
1574         }
1575         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1576       }
1577     }
1578     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1579   }
1580   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587 {
1588   Mat            A = (Mat) Aa;
1589   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1590   PetscErrorCode ierr;
1591   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593   MatScalar      *aa;
1594   PetscViewer    viewer;
1595 
1596   PetscFunctionBegin;
1597 
1598   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1599   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1600 
1601   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1602 
1603   /* loop over matrix elements drawing boxes */
1604   color = PETSC_DRAW_BLUE;
1605   for (i=0,row=0; i<mbs; i++,row+=bs) {
1606     for (j=a->i[i]; j<a->i[i+1]; j++) {
1607       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609       aa = a->a + j*bs2;
1610       for (k=0; k<bs; k++) {
1611         for (l=0; l<bs; l++) {
1612           if (PetscRealPart(*aa++) >=  0.) continue;
1613           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1614         }
1615       }
1616     }
1617   }
1618   color = PETSC_DRAW_CYAN;
1619   for (i=0,row=0; i<mbs; i++,row+=bs) {
1620     for (j=a->i[i]; j<a->i[i+1]; j++) {
1621       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1622       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1623       aa = a->a + j*bs2;
1624       for (k=0; k<bs; k++) {
1625         for (l=0; l<bs; l++) {
1626           if (PetscRealPart(*aa++) != 0.) continue;
1627           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1628         }
1629       }
1630     }
1631   }
1632 
1633   color = PETSC_DRAW_RED;
1634   for (i=0,row=0; i<mbs; i++,row+=bs) {
1635     for (j=a->i[i]; j<a->i[i+1]; j++) {
1636       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1637       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1638       aa = a->a + j*bs2;
1639       for (k=0; k<bs; k++) {
1640         for (l=0; l<bs; l++) {
1641           if (PetscRealPart(*aa++) <= 0.) continue;
1642           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1643         }
1644       }
1645     }
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1652 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1653 {
1654   PetscErrorCode ierr;
1655   PetscReal      xl,yl,xr,yr,w,h;
1656   PetscDraw      draw;
1657   PetscTruth     isnull;
1658 
1659   PetscFunctionBegin;
1660 
1661   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1662   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1663 
1664   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1665   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1666   xr += w;    yr += h;  xl = -w;     yl = -h;
1667   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1668   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1669   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatView_SeqBAIJ"
1675 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1676 {
1677   PetscErrorCode ierr;
1678   PetscTruth     iascii,isbinary,isdraw;
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1682   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1683   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1684   if (iascii){
1685     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1686   } else if (isbinary) {
1687     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1688   } else if (isdraw) {
1689     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1690   } else {
1691     Mat B;
1692     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1693     ierr = MatView(B,viewer);CHKERRQ(ierr);
1694     ierr = MatDestroy(B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1702 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1703 {
1704   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1705   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1706   PetscInt    *ai = a->i,*ailen = a->ilen;
1707   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1708   MatScalar   *ap,*aa = a->a;
1709 
1710   PetscFunctionBegin;
1711   for (k=0; k<m; k++) { /* loop over rows */
1712     row  = im[k]; brow = row/bs;
1713     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1714     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1715     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1716     nrow = ailen[brow];
1717     for (l=0; l<n; l++) { /* loop over columns */
1718       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1719       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1720       col  = in[l] ;
1721       bcol = col/bs;
1722       cidx = col%bs;
1723       ridx = row%bs;
1724       high = nrow;
1725       low  = 0; /* assume unsorted */
1726       while (high-low > 5) {
1727         t = (low+high)/2;
1728         if (rp[t] > bcol) high = t;
1729         else             low  = t;
1730       }
1731       for (i=low; i<high; i++) {
1732         if (rp[i] > bcol) break;
1733         if (rp[i] == bcol) {
1734           *v++ = ap[bs2*i+bs*cidx+ridx];
1735           goto finished;
1736         }
1737       }
1738       *v++ = 0.0;
1739       finished:;
1740     }
1741   }
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #define CHUNKSIZE 10
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1749 {
1750   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1751   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1752   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1753   PetscErrorCode    ierr;
1754   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1755   PetscTruth        roworiented=a->roworiented;
1756   const PetscScalar *value = v;
1757   MatScalar         *ap,*aa = a->a,*bap;
1758 
1759   PetscFunctionBegin;
1760   if (roworiented) {
1761     stepval = (n-1)*bs;
1762   } else {
1763     stepval = (m-1)*bs;
1764   }
1765   for (k=0; k<m; k++) { /* loop over added rows */
1766     row  = im[k];
1767     if (row < 0) continue;
1768 #if defined(PETSC_USE_DEBUG)
1769     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1770 #endif
1771     rp   = aj + ai[row];
1772     ap   = aa + bs2*ai[row];
1773     rmax = imax[row];
1774     nrow = ailen[row];
1775     low  = 0;
1776     high = nrow;
1777     for (l=0; l<n; l++) { /* loop over added columns */
1778       if (in[l] < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1781 #endif
1782       col = in[l];
1783       if (roworiented) {
1784         value = v + k*(stepval+bs)*bs + l*bs;
1785       } else {
1786         value = v + l*(stepval+bs)*bs + k*bs;
1787       }
1788       if (col <= lastcol) low = 0; else high = nrow;
1789       lastcol = col;
1790       while (high-low > 7) {
1791         t = (low+high)/2;
1792         if (rp[t] > col) high = t;
1793         else             low  = t;
1794       }
1795       for (i=low; i<high; i++) {
1796         if (rp[i] > col) break;
1797         if (rp[i] == col) {
1798           bap  = ap +  bs2*i;
1799           if (roworiented) {
1800             if (is == ADD_VALUES) {
1801               for (ii=0; ii<bs; ii++,value+=stepval) {
1802                 for (jj=ii; jj<bs2; jj+=bs) {
1803                   bap[jj] += *value++;
1804                 }
1805               }
1806             } else {
1807               for (ii=0; ii<bs; ii++,value+=stepval) {
1808                 for (jj=ii; jj<bs2; jj+=bs) {
1809                   bap[jj] = *value++;
1810                 }
1811               }
1812             }
1813           } else {
1814             if (is == ADD_VALUES) {
1815               for (ii=0; ii<bs; ii++,value+=stepval) {
1816                 for (jj=0; jj<bs; jj++) {
1817                   *bap++ += *value++;
1818                 }
1819               }
1820             } else {
1821               for (ii=0; ii<bs; ii++,value+=stepval) {
1822                 for (jj=0; jj<bs; jj++) {
1823                   *bap++  = *value++;
1824                 }
1825               }
1826             }
1827           }
1828           goto noinsert2;
1829         }
1830       }
1831       if (nonew == 1) goto noinsert2;
1832       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1833       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1834       N = nrow++ - 1; high++;
1835       /* shift up all the later entries in this row */
1836       for (ii=N; ii>=i; ii--) {
1837         rp[ii+1] = rp[ii];
1838         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1839       }
1840       if (N >= i) {
1841         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1842       }
1843       rp[i] = col;
1844       bap   = ap +  bs2*i;
1845       if (roworiented) {
1846         for (ii=0; ii<bs; ii++,value+=stepval) {
1847           for (jj=ii; jj<bs2; jj+=bs) {
1848             bap[jj] = *value++;
1849           }
1850         }
1851       } else {
1852         for (ii=0; ii<bs; ii++,value+=stepval) {
1853           for (jj=0; jj<bs; jj++) {
1854             *bap++  = *value++;
1855           }
1856         }
1857       }
1858       noinsert2:;
1859       low = i;
1860     }
1861     ailen[row] = nrow;
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1868 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1869 {
1870   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1871   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1872   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1873   PetscErrorCode ierr;
1874   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1875   MatScalar      *aa = a->a,*ap;
1876   PetscReal      ratio=0.6;
1877 
1878   PetscFunctionBegin;
1879   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1880 
1881   if (m) rmax = ailen[0];
1882   for (i=1; i<mbs; i++) {
1883     /* move each row back by the amount of empty slots (fshift) before it*/
1884     fshift += imax[i-1] - ailen[i-1];
1885     rmax   = PetscMax(rmax,ailen[i]);
1886     if (fshift) {
1887       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1888       N = ailen[i];
1889       for (j=0; j<N; j++) {
1890         ip[j-fshift] = ip[j];
1891         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1892       }
1893     }
1894     ai[i] = ai[i-1] + ailen[i-1];
1895   }
1896   if (mbs) {
1897     fshift += imax[mbs-1] - ailen[mbs-1];
1898     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1899   }
1900   /* reset ilen and imax for each row */
1901   for (i=0; i<mbs; i++) {
1902     ailen[i] = imax[i] = ai[i+1] - ai[i];
1903   }
1904   a->nz = ai[mbs];
1905 
1906   /* diagonals may have moved, so kill the diagonal pointers */
1907   a->idiagvalid = PETSC_FALSE;
1908   if (fshift && a->diag) {
1909     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1910     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1911     a->diag = 0;
1912   }
1913   if (fshift && a->nounused == -1) {
1914     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1915   }
1916   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1917   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1918   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1919   a->reallocs          = 0;
1920   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1921 
1922   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1923   if (a->compressedrow.use){
1924     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1925   }
1926 
1927   A->same_nonzero = PETSC_TRUE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 /*
1932    This function returns an array of flags which indicate the locations of contiguous
1933    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1934    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1935    Assume: sizes should be long enough to hold all the values.
1936 */
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1939 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1940 {
1941   PetscInt   i,j,k,row;
1942   PetscTruth flg;
1943 
1944   PetscFunctionBegin;
1945   for (i=0,j=0; i<n; j++) {
1946     row = idx[i];
1947     if (row%bs!=0) { /* Not the begining of a block */
1948       sizes[j] = 1;
1949       i++;
1950     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1951       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1952       i++;
1953     } else { /* Begining of the block, so check if the complete block exists */
1954       flg = PETSC_TRUE;
1955       for (k=1; k<bs; k++) {
1956         if (row+k != idx[i+k]) { /* break in the block */
1957           flg = PETSC_FALSE;
1958           break;
1959         }
1960       }
1961       if (flg) { /* No break in the bs */
1962         sizes[j] = bs;
1963         i+= bs;
1964       } else {
1965         sizes[j] = 1;
1966         i++;
1967       }
1968     }
1969   }
1970   *bs_max = j;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1976 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1977 {
1978   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1979   PetscErrorCode ierr;
1980   PetscInt       i,j,k,count,*rows;
1981   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1982   PetscScalar    zero = 0.0;
1983   MatScalar      *aa;
1984 
1985   PetscFunctionBegin;
1986   /* Make a copy of the IS and  sort it */
1987   /* allocate memory for rows,sizes */
1988   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1989   sizes = rows + is_n;
1990 
1991   /* copy IS values to rows, and sort them */
1992   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1993   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1994   if (baij->keepzeroedrows) {
1995     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1996     bs_max = is_n;
1997     A->same_nonzero = PETSC_TRUE;
1998   } else {
1999     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2000     A->same_nonzero = PETSC_FALSE;
2001   }
2002 
2003   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2004     row   = rows[j];
2005     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2006     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2007     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2008     if (sizes[i] == bs && !baij->keepzeroedrows) {
2009       if (diag != 0.0) {
2010         if (baij->ilen[row/bs] > 0) {
2011           baij->ilen[row/bs]       = 1;
2012           baij->j[baij->i[row/bs]] = row/bs;
2013           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2014         }
2015         /* Now insert all the diagonal values for this bs */
2016         for (k=0; k<bs; k++) {
2017           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2018         }
2019       } else { /* (diag == 0.0) */
2020         baij->ilen[row/bs] = 0;
2021       } /* end (diag == 0.0) */
2022     } else { /* (sizes[i] != bs) */
2023 #if defined (PETSC_USE_DEBUG)
2024       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2025 #endif
2026       for (k=0; k<count; k++) {
2027         aa[0] =  zero;
2028         aa    += bs;
2029       }
2030       if (diag != 0.0) {
2031         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2032       }
2033     }
2034   }
2035 
2036   ierr = PetscFree(rows);CHKERRQ(ierr);
2037   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2043 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2044 {
2045   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2046   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2047   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2048   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2049   PetscErrorCode ierr;
2050   PetscInt       ridx,cidx,bs2=a->bs2;
2051   PetscTruth     roworiented=a->roworiented;
2052   MatScalar      *ap,value,*aa=a->a,*bap;
2053 
2054   PetscFunctionBegin;
2055   for (k=0; k<m; k++) { /* loop over added rows */
2056     row  = im[k];
2057     brow = row/bs;
2058     if (row < 0) continue;
2059 #if defined(PETSC_USE_DEBUG)
2060     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2061 #endif
2062     rp   = aj + ai[brow];
2063     ap   = aa + bs2*ai[brow];
2064     rmax = imax[brow];
2065     nrow = ailen[brow];
2066     low  = 0;
2067     high = nrow;
2068     for (l=0; l<n; l++) { /* loop over added columns */
2069       if (in[l] < 0) continue;
2070 #if defined(PETSC_USE_DEBUG)
2071       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2072 #endif
2073       col = in[l]; bcol = col/bs;
2074       ridx = row % bs; cidx = col % bs;
2075       if (roworiented) {
2076         value = v[l + k*n];
2077       } else {
2078         value = v[k + l*m];
2079       }
2080       if (col <= lastcol) low = 0; else high = nrow;
2081       lastcol = col;
2082       while (high-low > 7) {
2083         t = (low+high)/2;
2084         if (rp[t] > bcol) high = t;
2085         else              low  = t;
2086       }
2087       for (i=low; i<high; i++) {
2088         if (rp[i] > bcol) break;
2089         if (rp[i] == bcol) {
2090           bap  = ap +  bs2*i + bs*cidx + ridx;
2091           if (is == ADD_VALUES) *bap += value;
2092           else                  *bap  = value;
2093           goto noinsert1;
2094         }
2095       }
2096       if (nonew == 1) goto noinsert1;
2097       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2098       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2099       N = nrow++ - 1; high++;
2100       /* shift up all the later entries in this row */
2101       for (ii=N; ii>=i; ii--) {
2102         rp[ii+1] = rp[ii];
2103         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2104       }
2105       if (N>=i) {
2106         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2107       }
2108       rp[i]                      = bcol;
2109       ap[bs2*i + bs*cidx + ridx] = value;
2110       a->nz++;
2111       noinsert1:;
2112       low = i;
2113     }
2114     ailen[brow] = nrow;
2115   }
2116   A->same_nonzero = PETSC_FALSE;
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2124 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2125 {
2126   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2127   Mat            outA;
2128   PetscErrorCode ierr;
2129   PetscTruth     row_identity,col_identity;
2130 
2131   PetscFunctionBegin;
2132   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2133   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2134   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2135   if (!row_identity || !col_identity) {
2136     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2137   }
2138 
2139   outA          = inA;
2140   inA->factor   = MAT_FACTOR_LU;
2141 
2142   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2143 
2144   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2145   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2146   a->row = row;
2147   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2148   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2149   a->col = col;
2150 
2151   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2152   if (a->icol) {
2153     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2154   }
2155   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2157 
2158   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2159   if (!a->solve_work) {
2160     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2161     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2162   }
2163   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2164 
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 EXTERN_C_BEGIN
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2171 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2172 {
2173   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2174   PetscInt    i,nz,nbs;
2175 
2176   PetscFunctionBegin;
2177   nz  = baij->maxnz/baij->bs2;
2178   nbs = baij->nbs;
2179   for (i=0; i<nz; i++) {
2180     baij->j[i] = indices[i];
2181   }
2182   baij->nz = nz;
2183   for (i=0; i<nbs; i++) {
2184     baij->ilen[i] = baij->imax[i];
2185   }
2186 
2187   PetscFunctionReturn(0);
2188 }
2189 EXTERN_C_END
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2193 /*@
2194     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2195        in the matrix.
2196 
2197   Input Parameters:
2198 +  mat - the SeqBAIJ matrix
2199 -  indices - the column indices
2200 
2201   Level: advanced
2202 
2203   Notes:
2204     This can be called if you have precomputed the nonzero structure of the
2205   matrix and want to provide it to the matrix object to improve the performance
2206   of the MatSetValues() operation.
2207 
2208     You MUST have set the correct numbers of nonzeros per row in the call to
2209   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2210 
2211     MUST be called before any calls to MatSetValues();
2212 
2213 @*/
2214 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2215 {
2216   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2217 
2218   PetscFunctionBegin;
2219   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2220   PetscValidPointer(indices,2);
2221   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2222   if (f) {
2223     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2224   } else {
2225     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2232 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2233 {
2234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2235   PetscErrorCode ierr;
2236   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2237   PetscReal      atmp;
2238   PetscScalar    *x,zero = 0.0;
2239   MatScalar      *aa;
2240   PetscInt       ncols,brow,krow,kcol;
2241 
2242   PetscFunctionBegin;
2243   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2244   bs   = A->rmap->bs;
2245   aa   = a->a;
2246   ai   = a->i;
2247   aj   = a->j;
2248   mbs  = a->mbs;
2249 
2250   ierr = VecSet(v,zero);CHKERRQ(ierr);
2251   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2252   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2253   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2254   for (i=0; i<mbs; i++) {
2255     ncols = ai[1] - ai[0]; ai++;
2256     brow  = bs*i;
2257     for (j=0; j<ncols; j++){
2258       for (kcol=0; kcol<bs; kcol++){
2259         for (krow=0; krow<bs; krow++){
2260           atmp = PetscAbsScalar(*aa);aa++;
2261           row = brow + krow;    /* row index */
2262           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2263           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2264         }
2265       }
2266       aj++;
2267     }
2268   }
2269   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatCopy_SeqBAIJ"
2275 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2276 {
2277   PetscErrorCode ierr;
2278 
2279   PetscFunctionBegin;
2280   /* If the two matrices have the same copy implementation, use fast copy. */
2281   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2282     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2283     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2284 
2285     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2286       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2287     }
2288     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2289   } else {
2290     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2297 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2308 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2309 {
2310   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2311   PetscFunctionBegin;
2312   *array = a->a;
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2318 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2319 {
2320   PetscFunctionBegin;
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #include "petscblaslapack.h"
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2327 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2328 {
2329   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2330   PetscErrorCode ierr;
2331   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2332   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2333 
2334   PetscFunctionBegin;
2335   if (str == SAME_NONZERO_PATTERN) {
2336     PetscScalar alpha = a;
2337     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2338   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2339     if (y->xtoy && y->XtoY != X) {
2340       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2341       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2342     }
2343     if (!y->xtoy) { /* get xtoy */
2344       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2345       y->XtoY = X;
2346       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2347     }
2348     bs2 = bs*bs;
2349     for (i=0; i<x->nz; i++) {
2350       j = 0;
2351       while (j < bs2){
2352         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2353         j++;
2354       }
2355     }
2356     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2357   } else {
2358     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2359   }
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2365 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2366 {
2367   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2368   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2369   MatScalar      *aa = a->a;
2370 
2371   PetscFunctionBegin;
2372   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2378 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2379 {
2380   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2381   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2382   MatScalar      *aa = a->a;
2383 
2384   PetscFunctionBegin;
2385   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 
2390 /* -------------------------------------------------------------------*/
2391 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2392        MatGetRow_SeqBAIJ,
2393        MatRestoreRow_SeqBAIJ,
2394        MatMult_SeqBAIJ_N,
2395 /* 4*/ MatMultAdd_SeqBAIJ_N,
2396        MatMultTranspose_SeqBAIJ,
2397        MatMultTransposeAdd_SeqBAIJ,
2398        0,
2399        0,
2400        0,
2401 /*10*/ 0,
2402        MatLUFactor_SeqBAIJ,
2403        0,
2404        0,
2405        MatTranspose_SeqBAIJ,
2406 /*15*/ MatGetInfo_SeqBAIJ,
2407        MatEqual_SeqBAIJ,
2408        MatGetDiagonal_SeqBAIJ,
2409        MatDiagonalScale_SeqBAIJ,
2410        MatNorm_SeqBAIJ,
2411 /*20*/ 0,
2412        MatAssemblyEnd_SeqBAIJ,
2413        0,
2414        MatSetOption_SeqBAIJ,
2415        MatZeroEntries_SeqBAIJ,
2416 /*25*/ MatZeroRows_SeqBAIJ,
2417        0,
2418        0,
2419        0,
2420        0,
2421 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2422        0,
2423        0,
2424        MatGetArray_SeqBAIJ,
2425        MatRestoreArray_SeqBAIJ,
2426 /*35*/ MatDuplicate_SeqBAIJ,
2427        0,
2428        0,
2429        MatILUFactor_SeqBAIJ,
2430        0,
2431 /*40*/ MatAXPY_SeqBAIJ,
2432        MatGetSubMatrices_SeqBAIJ,
2433        MatIncreaseOverlap_SeqBAIJ,
2434        MatGetValues_SeqBAIJ,
2435        MatCopy_SeqBAIJ,
2436 /*45*/ 0,
2437        MatScale_SeqBAIJ,
2438        0,
2439        0,
2440        0,
2441 /*50*/ 0,
2442        MatGetRowIJ_SeqBAIJ,
2443        MatRestoreRowIJ_SeqBAIJ,
2444        0,
2445        0,
2446 /*55*/ 0,
2447        0,
2448        0,
2449        0,
2450        MatSetValuesBlocked_SeqBAIJ,
2451 /*60*/ MatGetSubMatrix_SeqBAIJ,
2452        MatDestroy_SeqBAIJ,
2453        MatView_SeqBAIJ,
2454        0,
2455        0,
2456 /*65*/ 0,
2457        0,
2458        0,
2459        0,
2460        0,
2461 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2462        0,
2463        MatConvert_Basic,
2464        0,
2465        0,
2466 /*75*/ 0,
2467        0,
2468        0,
2469        0,
2470        0,
2471 /*80*/ 0,
2472        0,
2473        0,
2474        0,
2475        MatLoad_SeqBAIJ,
2476 /*85*/ 0,
2477        0,
2478        0,
2479        0,
2480        0,
2481 /*90*/ 0,
2482        0,
2483        0,
2484        0,
2485        0,
2486 /*95*/ 0,
2487        0,
2488        0,
2489        0,
2490        0,
2491 /*100*/0,
2492        0,
2493        0,
2494        0,
2495        0,
2496 /*105*/0,
2497        MatRealPart_SeqBAIJ,
2498        MatImaginaryPart_SeqBAIJ,
2499        0,
2500        0,
2501 /*110*/0,
2502        0,
2503        0,
2504        0,
2505        MatMissingDiagonal_SeqBAIJ
2506 /*115*/
2507 };
2508 
2509 EXTERN_C_BEGIN
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2512 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2513 {
2514   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2515   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   if (aij->nonew != 1) {
2520     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2521   }
2522 
2523   /* allocate space for values if not already there */
2524   if (!aij->saved_values) {
2525     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2526   }
2527 
2528   /* copy values over */
2529   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 EXTERN_C_END
2533 
2534 EXTERN_C_BEGIN
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2537 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2538 {
2539   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2540   PetscErrorCode ierr;
2541   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2542 
2543   PetscFunctionBegin;
2544   if (aij->nonew != 1) {
2545     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2546   }
2547   if (!aij->saved_values) {
2548     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2549   }
2550 
2551   /* copy values over */
2552   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 EXTERN_C_END
2556 
2557 EXTERN_C_BEGIN
2558 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2560 EXTERN_C_END
2561 
2562 EXTERN_C_BEGIN
2563 #undef __FUNCT__
2564 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2565 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2566 {
2567   Mat_SeqBAIJ    *b;
2568   PetscErrorCode ierr;
2569   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2570   PetscTruth     flg,skipallocation = PETSC_FALSE;
2571 
2572   PetscFunctionBegin;
2573 
2574   if (nz == MAT_SKIP_ALLOCATION) {
2575     skipallocation = PETSC_TRUE;
2576     nz             = 0;
2577   }
2578 
2579   if (bs < 0) {
2580     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2581       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2582     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2583     bs   = PetscAbs(bs);
2584   }
2585   if (nnz && newbs != bs) {
2586     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2587   }
2588   bs   = newbs;
2589 
2590   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2591   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2592   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2593   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2594 
2595   B->preallocated = PETSC_TRUE;
2596 
2597   mbs  = B->rmap->n/bs;
2598   nbs  = B->cmap->n/bs;
2599   bs2  = bs*bs;
2600 
2601   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2602     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2603   }
2604 
2605   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2606   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2607   if (nnz) {
2608     for (i=0; i<mbs; i++) {
2609       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2610       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2611     }
2612   }
2613 
2614   b       = (Mat_SeqBAIJ*)B->data;
2615   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2616     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2617   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2618 
2619   if (!flg) {
2620     switch (bs) {
2621     case 1:
2622       B->ops->mult            = MatMult_SeqBAIJ_1;
2623       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2624       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2625       break;
2626     case 2:
2627       B->ops->mult            = MatMult_SeqBAIJ_2;
2628       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2629       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2630       break;
2631     case 3:
2632       B->ops->mult            = MatMult_SeqBAIJ_3;
2633       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2634       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2635       break;
2636     case 4:
2637       B->ops->mult            = MatMult_SeqBAIJ_4;
2638       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2639       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2640       break;
2641     case 5:
2642       B->ops->mult            = MatMult_SeqBAIJ_5;
2643       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2644       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2645       break;
2646     case 6:
2647       B->ops->mult            = MatMult_SeqBAIJ_6;
2648       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2649       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2650       break;
2651     case 7:
2652       B->ops->mult            = MatMult_SeqBAIJ_7;
2653       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2654       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2655       break;
2656     default:
2657       B->ops->mult            = MatMult_SeqBAIJ_N;
2658       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2659       break;
2660     }
2661   }
2662   B->rmap->bs      = bs;
2663   b->mbs     = mbs;
2664   b->nbs     = nbs;
2665   if (!skipallocation) {
2666     if (!b->imax) {
2667       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2668     }
2669     /* b->ilen will count nonzeros in each block row so far. */
2670     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2671     if (!nnz) {
2672       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2673       else if (nz <= 0)        nz = 1;
2674       for (i=0; i<mbs; i++) b->imax[i] = nz;
2675       nz = nz*mbs;
2676     } else {
2677       nz = 0;
2678       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2679     }
2680 
2681     /* allocate the matrix space */
2682     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2683     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2684     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2685     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2686     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2687     b->singlemalloc = PETSC_TRUE;
2688     b->i[0] = 0;
2689     for (i=1; i<mbs+1; i++) {
2690       b->i[i] = b->i[i-1] + b->imax[i-1];
2691     }
2692     b->free_a     = PETSC_TRUE;
2693     b->free_ij    = PETSC_TRUE;
2694   } else {
2695     b->free_a     = PETSC_FALSE;
2696     b->free_ij    = PETSC_FALSE;
2697   }
2698 
2699   B->rmap->bs          = bs;
2700   b->bs2              = bs2;
2701   b->mbs              = mbs;
2702   b->nz               = 0;
2703   b->maxnz            = nz*bs2;
2704   B->info.nz_unneeded = (PetscReal)b->maxnz;
2705   PetscFunctionReturn(0);
2706 }
2707 EXTERN_C_END
2708 
2709 EXTERN_C_BEGIN
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2712 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2713 {
2714   PetscInt       i,m,nz,nz_max=0,*nnz;
2715   PetscScalar    *values=0;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719 
2720   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2721 
2722   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2723   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2724   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2725   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2726   m = B->rmap->n/bs;
2727 
2728   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2729   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2730   for(i=0; i<m; i++) {
2731     nz = ii[i+1]- ii[i];
2732     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2733     nz_max = PetscMax(nz_max, nz);
2734     nnz[i] = nz;
2735   }
2736   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2737   ierr = PetscFree(nnz);CHKERRQ(ierr);
2738 
2739   values = (PetscScalar*)V;
2740   if (!values) {
2741     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2742     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2743   }
2744   for (i=0; i<m; i++) {
2745     PetscInt          ncols  = ii[i+1] - ii[i];
2746     const PetscInt    *icols = jj + ii[i];
2747     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2748     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2749   }
2750   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2751   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2752   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2753 
2754   PetscFunctionReturn(0);
2755 }
2756 EXTERN_C_END
2757 
2758 
2759 EXTERN_C_BEGIN
2760 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2761 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2762 EXTERN_C_END
2763 
2764 /*MC
2765    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2766    block sparse compressed row format.
2767 
2768    Options Database Keys:
2769 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2770 
2771   Level: beginner
2772 
2773 .seealso: MatCreateSeqBAIJ()
2774 M*/
2775 
2776 
2777 EXTERN_C_BEGIN
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatCreate_SeqBAIJ"
2780 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2781 {
2782   PetscErrorCode ierr;
2783   PetscMPIInt    size;
2784   Mat_SeqBAIJ    *b;
2785 
2786   PetscFunctionBegin;
2787   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2788   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2789 
2790   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2791   B->data = (void*)b;
2792   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2793   B->mapping          = 0;
2794   b->row              = 0;
2795   b->col              = 0;
2796   b->icol             = 0;
2797   b->reallocs         = 0;
2798   b->saved_values     = 0;
2799 
2800   b->roworiented      = PETSC_TRUE;
2801   b->nonew            = 0;
2802   b->diag             = 0;
2803   b->solve_work       = 0;
2804   b->mult_work        = 0;
2805   B->spptr            = 0;
2806   B->info.nz_unneeded = (PetscReal)b->maxnz;
2807   b->keepzeroedrows   = PETSC_FALSE;
2808   b->xtoy              = 0;
2809   b->XtoY              = 0;
2810   b->compressedrow.use     = PETSC_FALSE;
2811   b->compressedrow.nrows   = 0;
2812   b->compressedrow.i       = PETSC_NULL;
2813   b->compressedrow.rindex  = PETSC_NULL;
2814   b->compressedrow.checked = PETSC_FALSE;
2815   B->same_nonzero          = PETSC_FALSE;
2816 
2817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2818                                      "MatGetFactorAvailable_seqbaij_petsc",
2819                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2821                                      "MatGetFactor_seqbaij_petsc",
2822                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2824                                      "MatInvertBlockDiagonal_SeqBAIJ",
2825                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2827                                      "MatStoreValues_SeqBAIJ",
2828                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2830                                      "MatRetrieveValues_SeqBAIJ",
2831                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2833                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2834                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2836                                      "MatConvert_SeqBAIJ_SeqAIJ",
2837                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2839                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2840                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2842                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2843                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2845                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2846                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2847   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2848   PetscFunctionReturn(0);
2849 }
2850 EXTERN_C_END
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2854 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2855 {
2856   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2857   PetscErrorCode ierr;
2858   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2859 
2860   PetscFunctionBegin;
2861   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2862 
2863   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2864   for (i=0; i<mbs; i++) {
2865     c->imax[i] = a->imax[i];
2866     c->ilen[i] = a->ilen[i];
2867   }
2868 
2869   /* allocate the matrix space */
2870   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2871   c->singlemalloc = PETSC_TRUE;
2872   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2873   if (mbs > 0) {
2874     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2875     if (cpvalues == MAT_COPY_VALUES) {
2876       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2877     } else {
2878       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2879     }
2880   }
2881   c->roworiented = a->roworiented;
2882   c->nonew       = a->nonew;
2883   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2884   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2885   c->bs2         = a->bs2;
2886   c->mbs         = a->mbs;
2887   c->nbs         = a->nbs;
2888 
2889   if (a->diag) {
2890     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2891     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2892     for (i=0; i<mbs; i++) {
2893       c->diag[i] = a->diag[i];
2894     }
2895   } else c->diag        = 0;
2896   c->nz                 = a->nz;
2897   c->maxnz              = a->maxnz;
2898   c->solve_work         = 0;
2899   c->mult_work          = 0;
2900   c->free_a             = PETSC_TRUE;
2901   c->free_ij            = PETSC_TRUE;
2902   C->preallocated       = PETSC_TRUE;
2903   C->assembled          = PETSC_TRUE;
2904 
2905   c->compressedrow.use     = a->compressedrow.use;
2906   c->compressedrow.nrows   = a->compressedrow.nrows;
2907   c->compressedrow.checked = a->compressedrow.checked;
2908   if ( a->compressedrow.checked && a->compressedrow.use){
2909     i = a->compressedrow.nrows;
2910     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2911     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2912     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2913     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2914   } else {
2915     c->compressedrow.use    = PETSC_FALSE;
2916     c->compressedrow.i      = PETSC_NULL;
2917     c->compressedrow.rindex = PETSC_NULL;
2918   }
2919   C->same_nonzero = A->same_nonzero;
2920   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2926 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2927 {
2928     PetscErrorCode ierr;
2929 
2930   PetscFunctionBegin;
2931   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2932   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2933   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2934   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatLoad_SeqBAIJ"
2940 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2941 {
2942   Mat_SeqBAIJ    *a;
2943   Mat            B;
2944   PetscErrorCode ierr;
2945   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2946   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2947   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2948   PetscInt       *masked,nmask,tmp,bs2,ishift;
2949   PetscMPIInt    size;
2950   int            fd;
2951   PetscScalar    *aa;
2952   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2956     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2957   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2958   bs2  = bs*bs;
2959 
2960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2961   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2962   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2963   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2964   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2965   M = header[1]; N = header[2]; nz = header[3];
2966 
2967   if (header[3] < 0) {
2968     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2969   }
2970 
2971   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2972 
2973   /*
2974      This code adds extra rows to make sure the number of rows is
2975     divisible by the blocksize
2976   */
2977   mbs        = M/bs;
2978   extra_rows = bs - M + bs*(mbs);
2979   if (extra_rows == bs) extra_rows = 0;
2980   else                  mbs++;
2981   if (extra_rows) {
2982     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2983   }
2984 
2985   /* read in row lengths */
2986   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2987   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2988   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2989 
2990   /* read in column indices */
2991   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2992   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2993   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2994 
2995   /* loop over row lengths determining block row lengths */
2996   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2997   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2998   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2999   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3000   masked   = mask + mbs;
3001   rowcount = 0; nzcount = 0;
3002   for (i=0; i<mbs; i++) {
3003     nmask = 0;
3004     for (j=0; j<bs; j++) {
3005       kmax = rowlengths[rowcount];
3006       for (k=0; k<kmax; k++) {
3007         tmp = jj[nzcount++]/bs;
3008         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3009       }
3010       rowcount++;
3011     }
3012     browlengths[i] += nmask;
3013     /* zero out the mask elements we set */
3014     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3015   }
3016 
3017   /* create our matrix */
3018   ierr = MatCreate(comm,&B);
3019   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3020   ierr = MatSetType(B,type);CHKERRQ(ierr);
3021   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3022   a = (Mat_SeqBAIJ*)B->data;
3023 
3024   /* set matrix "i" values */
3025   a->i[0] = 0;
3026   for (i=1; i<= mbs; i++) {
3027     a->i[i]      = a->i[i-1] + browlengths[i-1];
3028     a->ilen[i-1] = browlengths[i-1];
3029   }
3030   a->nz         = 0;
3031   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3032 
3033   /* read in nonzero values */
3034   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3035   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3036   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3037 
3038   /* set "a" and "j" values into matrix */
3039   nzcount = 0; jcount = 0;
3040   for (i=0; i<mbs; i++) {
3041     nzcountb = nzcount;
3042     nmask    = 0;
3043     for (j=0; j<bs; j++) {
3044       kmax = rowlengths[i*bs+j];
3045       for (k=0; k<kmax; k++) {
3046         tmp = jj[nzcount++]/bs;
3047 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3048       }
3049     }
3050     /* sort the masked values */
3051     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3052 
3053     /* set "j" values into matrix */
3054     maskcount = 1;
3055     for (j=0; j<nmask; j++) {
3056       a->j[jcount++]  = masked[j];
3057       mask[masked[j]] = maskcount++;
3058     }
3059     /* set "a" values into matrix */
3060     ishift = bs2*a->i[i];
3061     for (j=0; j<bs; j++) {
3062       kmax = rowlengths[i*bs+j];
3063       for (k=0; k<kmax; k++) {
3064         tmp       = jj[nzcountb]/bs ;
3065         block     = mask[tmp] - 1;
3066         point     = jj[nzcountb] - bs*tmp;
3067         idx       = ishift + bs2*block + j + bs*point;
3068         a->a[idx] = (MatScalar)aa[nzcountb++];
3069       }
3070     }
3071     /* zero out the mask elements we set */
3072     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3073   }
3074   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3075 
3076   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3077   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3078   ierr = PetscFree(aa);CHKERRQ(ierr);
3079   ierr = PetscFree(jj);CHKERRQ(ierr);
3080   ierr = PetscFree(mask);CHKERRQ(ierr);
3081 
3082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   ierr = MatView_Private(B);CHKERRQ(ierr);
3085 
3086   *A = B;
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 #undef __FUNCT__
3091 #define __FUNCT__ "MatCreateSeqBAIJ"
3092 /*@C
3093    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3094    compressed row) format.  For good matrix assembly performance the
3095    user should preallocate the matrix storage by setting the parameter nz
3096    (or the array nnz).  By setting these parameters accurately, performance
3097    during matrix assembly can be increased by more than a factor of 50.
3098 
3099    Collective on MPI_Comm
3100 
3101    Input Parameters:
3102 +  comm - MPI communicator, set to PETSC_COMM_SELF
3103 .  bs - size of block
3104 .  m - number of rows
3105 .  n - number of columns
3106 .  nz - number of nonzero blocks  per block row (same for all rows)
3107 -  nnz - array containing the number of nonzero blocks in the various block rows
3108          (possibly different for each block row) or PETSC_NULL
3109 
3110    Output Parameter:
3111 .  A - the matrix
3112 
3113    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3114    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3115    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3116 
3117    Options Database Keys:
3118 .   -mat_no_unroll - uses code that does not unroll the loops in the
3119                      block calculations (much slower)
3120 .    -mat_block_size - size of the blocks to use
3121 
3122    Level: intermediate
3123 
3124    Notes:
3125    The number of rows and columns must be divisible by blocksize.
3126 
3127    If the nnz parameter is given then the nz parameter is ignored
3128 
3129    A nonzero block is any block that as 1 or more nonzeros in it
3130 
3131    The block AIJ format is fully compatible with standard Fortran 77
3132    storage.  That is, the stored row and column indices can begin at
3133    either one (as in Fortran) or zero.  See the users' manual for details.
3134 
3135    Specify the preallocated storage with either nz or nnz (not both).
3136    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3137    allocation.  For additional details, see the users manual chapter on
3138    matrices.
3139 
3140 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3141 @*/
3142 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3143 {
3144   PetscErrorCode ierr;
3145 
3146   PetscFunctionBegin;
3147   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3148   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3149   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3150   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3156 /*@C
3157    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3158    per row in the matrix. For good matrix assembly performance the
3159    user should preallocate the matrix storage by setting the parameter nz
3160    (or the array nnz).  By setting these parameters accurately, performance
3161    during matrix assembly can be increased by more than a factor of 50.
3162 
3163    Collective on MPI_Comm
3164 
3165    Input Parameters:
3166 +  A - the matrix
3167 .  bs - size of block
3168 .  nz - number of block nonzeros per block row (same for all rows)
3169 -  nnz - array containing the number of block nonzeros in the various block rows
3170          (possibly different for each block row) or PETSC_NULL
3171 
3172    Options Database Keys:
3173 .   -mat_no_unroll - uses code that does not unroll the loops in the
3174                      block calculations (much slower)
3175 .    -mat_block_size - size of the blocks to use
3176 
3177    Level: intermediate
3178 
3179    Notes:
3180    If the nnz parameter is given then the nz parameter is ignored
3181 
3182    You can call MatGetInfo() to get information on how effective the preallocation was;
3183    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3184    You can also run with the option -info and look for messages with the string
3185    malloc in them to see if additional memory allocation was needed.
3186 
3187    The block AIJ format is fully compatible with standard Fortran 77
3188    storage.  That is, the stored row and column indices can begin at
3189    either one (as in Fortran) or zero.  See the users' manual for details.
3190 
3191    Specify the preallocated storage with either nz or nnz (not both).
3192    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3193    allocation.  For additional details, see the users manual chapter on
3194    matrices.
3195 
3196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3197 @*/
3198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3199 {
3200   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3201 
3202   PetscFunctionBegin;
3203   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3204   if (f) {
3205     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3206   }
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3212 /*@C
3213    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3214    (the default sequential PETSc format).
3215 
3216    Collective on MPI_Comm
3217 
3218    Input Parameters:
3219 +  A - the matrix
3220 .  i - the indices into j for the start of each local row (starts with zero)
3221 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3222 -  v - optional values in the matrix
3223 
3224    Level: developer
3225 
3226 .keywords: matrix, aij, compressed row, sparse
3227 
3228 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3229 @*/
3230 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3231 {
3232   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3233 
3234   PetscFunctionBegin;
3235   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3236   if (f) {
3237     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3245 /*@
3246      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3247               (upper triangular entries in CSR format) provided by the user.
3248 
3249      Collective on MPI_Comm
3250 
3251    Input Parameters:
3252 +  comm - must be an MPI communicator of size 1
3253 .  bs - size of block
3254 .  m - number of rows
3255 .  n - number of columns
3256 .  i - row indices
3257 .  j - column indices
3258 -  a - matrix values
3259 
3260    Output Parameter:
3261 .  mat - the matrix
3262 
3263    Level: intermediate
3264 
3265    Notes:
3266        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3267     once the matrix is destroyed
3268 
3269        You cannot set new nonzero locations into this matrix, that will generate an error.
3270 
3271        The i and j indices are 0 based
3272 
3273 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3274 
3275 @*/
3276 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3277 {
3278   PetscErrorCode ierr;
3279   PetscInt       ii;
3280   Mat_SeqBAIJ    *baij;
3281 
3282   PetscFunctionBegin;
3283   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3284   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3285 
3286   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3287   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3288   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3289   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3290   baij = (Mat_SeqBAIJ*)(*mat)->data;
3291   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3292 
3293   baij->i = i;
3294   baij->j = j;
3295   baij->a = a;
3296   baij->singlemalloc = PETSC_FALSE;
3297   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3298   baij->free_a       = PETSC_FALSE;
3299   baij->free_ij       = PETSC_FALSE;
3300 
3301   for (ii=0; ii<m; ii++) {
3302     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3303 #if defined(PETSC_USE_DEBUG)
3304     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3305 #endif
3306   }
3307 #if defined(PETSC_USE_DEBUG)
3308   for (ii=0; ii<baij->i[m]; ii++) {
3309     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3310     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3311   }
3312 #endif
3313 
3314   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3315   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3316   PetscFunctionReturn(0);
3317 }
3318