xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 08917f38f07ae8f31cafc0a85f43479d150262bd)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "../src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "../src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155     nz   = tia[n];
1156   } else {
1157     tia = a->i; tja = a->j;
1158   }
1159 
1160   if (!blockcompressed && bs > 1) {
1161     (*nn) *= bs;
1162     nbs    = bs;
1163     /* malloc & create the natural set of indices */
1164     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1165     if (n) {
1166       (*ia)[0] = 0;
1167       for (j=1; j<bs; j++) {
1168         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1169       }
1170     }
1171 
1172     for (i=1; i<n; i++) {
1173       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1174       for (j=1; j<bs; j++) {
1175         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1176       }
1177     }
1178     if (n) {
1179       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1180     }
1181 
1182     if (ja) {
1183       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1184       cnt = 0;
1185       for (i=0; i<n; i++) {
1186         for (j=0; j<bs; j++) {
1187           for (k=tia[i]; k<tia[i+1]; k++) {
1188             for (l=0; l<bs; l++) {
1189               (*ja)[cnt++] = bs*tja[k] + l;
1190 	    }
1191           }
1192         }
1193       }
1194     }
1195 
1196     n     *= bs;
1197     nz *= bs*bs;
1198     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1199       ierr = PetscFree(tia);CHKERRQ(ierr);
1200       ierr = PetscFree(tja);CHKERRQ(ierr);
1201     }
1202   } else {
1203     *ia = tia;
1204     if (ja) *ja = tja;
1205   }
1206   if (oshift == 1) {
1207     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1208     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1215 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1216 {
1217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1218   PetscErrorCode ierr;
1219   PetscInt       i,n = a->mbs,nz = a->i[n];
1220 
1221   PetscFunctionBegin;
1222   if (!ia) PetscFunctionReturn(0);
1223   if (!blockcompressed && A->rmap->bs > 1) {
1224     ierr = PetscFree(*ia);CHKERRQ(ierr);
1225     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1226   } else if (symmetric) {
1227     ierr = PetscFree(*ia);CHKERRQ(ierr);
1228     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1229   } else if (oshift == 1) { /* blockcompressed */
1230     for (i=0; i<n+1; i++) a->i[i]--;
1231     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1238 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1239 {
1240   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244 #if defined(PETSC_USE_LOG)
1245   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1246 #endif
1247   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1248   if (a->row) {
1249     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1250   }
1251   if (a->col) {
1252     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1255   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1256   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1257   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1258   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1259   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1260   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1261   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1262   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1263 
1264   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1265   ierr = PetscFree(a);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1281 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1282 {
1283   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   switch (op) {
1288   case MAT_ROW_ORIENTED:
1289     a->roworiented    = flg;
1290     break;
1291   case MAT_KEEP_ZEROED_ROWS:
1292     a->keepzeroedrows = flg;
1293     break;
1294   case MAT_NEW_NONZERO_LOCATIONS:
1295     a->nonew          = (flg ? 0 : 1);
1296     break;
1297   case MAT_NEW_NONZERO_LOCATION_ERR:
1298     a->nonew          = (flg ? -1 : 0);
1299     break;
1300   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1301     a->nonew          = (flg ? -2 : 0);
1302     break;
1303   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1304     a->nounused       = (flg ? -1 : 0);
1305     break;
1306   case MAT_NEW_DIAGONALS:
1307   case MAT_IGNORE_OFF_PROC_ENTRIES:
1308   case MAT_USE_HASH_TABLE:
1309     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1310     break;
1311   case MAT_SYMMETRIC:
1312   case MAT_STRUCTURALLY_SYMMETRIC:
1313   case MAT_HERMITIAN:
1314   case MAT_SYMMETRY_ETERNAL:
1315     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1316     break;
1317   default:
1318     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1325 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1330   MatScalar      *aa,*aa_i;
1331   PetscScalar    *v_i;
1332 
1333   PetscFunctionBegin;
1334   bs  = A->rmap->bs;
1335   ai  = a->i;
1336   aj  = a->j;
1337   aa  = a->a;
1338   bs2 = a->bs2;
1339 
1340   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1341 
1342   bn  = row/bs;   /* Block number */
1343   bp  = row % bs; /* Block Position */
1344   M   = ai[bn+1] - ai[bn];
1345   *nz = bs*M;
1346 
1347   if (v) {
1348     *v = 0;
1349     if (*nz) {
1350       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1351       for (i=0; i<M; i++) { /* for each block in the block row */
1352         v_i  = *v + i*bs;
1353         aa_i = aa + bs2*(ai[bn] + i);
1354         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1355       }
1356     }
1357   }
1358 
1359   if (idx) {
1360     *idx = 0;
1361     if (*nz) {
1362       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1363       for (i=0; i<M; i++) { /* for each block in the block row */
1364         idx_i = *idx + i*bs;
1365         itmp  = bs*aj[ai[bn] + i];
1366         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1367       }
1368     }
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1375 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1381   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1387 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1388 {
1389   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1390   Mat            C;
1391   PetscErrorCode ierr;
1392   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1393   PetscInt       *rows,*cols,bs2=a->bs2;
1394   MatScalar      *array;
1395 
1396   PetscFunctionBegin;
1397   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1398   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1399     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1400     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1401 
1402     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1403     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1404     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1405     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1406     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1407     ierr = PetscFree(col);CHKERRQ(ierr);
1408   } else {
1409     C = *B;
1410   }
1411 
1412   array = a->a;
1413   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1414   cols = rows + bs;
1415   for (i=0; i<mbs; i++) {
1416     cols[0] = i*bs;
1417     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1418     len = ai[i+1] - ai[i];
1419     for (j=0; j<len; j++) {
1420       rows[0] = (*aj++)*bs;
1421       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1422       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1423       array += bs2;
1424     }
1425   }
1426   ierr = PetscFree(rows);CHKERRQ(ierr);
1427 
1428   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430 
1431   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1432     *B = C;
1433   } else {
1434     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1441 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1442 {
1443   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1444   PetscErrorCode ierr;
1445   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1446   int            fd;
1447   PetscScalar    *aa;
1448   FILE           *file;
1449 
1450   PetscFunctionBegin;
1451   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1452   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1453   col_lens[0] = MAT_FILE_COOKIE;
1454 
1455   col_lens[1] = A->rmap->N;
1456   col_lens[2] = A->cmap->n;
1457   col_lens[3] = a->nz*bs2;
1458 
1459   /* store lengths of each row and write (including header) to file */
1460   count = 0;
1461   for (i=0; i<a->mbs; i++) {
1462     for (j=0; j<bs; j++) {
1463       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1464     }
1465   }
1466   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1467   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1468 
1469   /* store column indices (zero start index) */
1470   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1471   count = 0;
1472   for (i=0; i<a->mbs; i++) {
1473     for (j=0; j<bs; j++) {
1474       for (k=a->i[i]; k<a->i[i+1]; k++) {
1475         for (l=0; l<bs; l++) {
1476           jj[count++] = bs*a->j[k] + l;
1477         }
1478       }
1479     }
1480   }
1481   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1482   ierr = PetscFree(jj);CHKERRQ(ierr);
1483 
1484   /* store nonzero values */
1485   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1486   count = 0;
1487   for (i=0; i<a->mbs; i++) {
1488     for (j=0; j<bs; j++) {
1489       for (k=a->i[i]; k<a->i[i+1]; k++) {
1490         for (l=0; l<bs; l++) {
1491           aa[count++] = a->a[bs2*k + l*bs + j];
1492         }
1493       }
1494     }
1495   }
1496   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1497   ierr = PetscFree(aa);CHKERRQ(ierr);
1498 
1499   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1500   if (file) {
1501     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1508 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1509 {
1510   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1511   PetscErrorCode    ierr;
1512   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1513   PetscViewerFormat format;
1514 
1515   PetscFunctionBegin;
1516   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1517   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520     Mat aij;
1521     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1522     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1523     ierr = MatDestroy(aij);CHKERRQ(ierr);
1524   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525      PetscFunctionReturn(0);
1526   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1528     for (i=0; i<a->mbs; i++) {
1529       for (j=0; j<bs; j++) {
1530         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1531         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532           for (l=0; l<bs; l++) {
1533 #if defined(PETSC_USE_COMPLEX)
1534             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1536                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1539                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             }
1543 #else
1544             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1546             }
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1554   } else {
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1556     for (i=0; i<a->mbs; i++) {
1557       for (j=0; j<bs; j++) {
1558         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1559         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560           for (l=0; l<bs; l++) {
1561 #if defined(PETSC_USE_COMPLEX)
1562             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1564                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1567                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             }
1571 #else
1572             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1580   }
1581   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1587 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1588 {
1589   Mat            A = (Mat) Aa;
1590   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1591   PetscErrorCode ierr;
1592   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1593   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1594   MatScalar      *aa;
1595   PetscViewer    viewer;
1596 
1597   PetscFunctionBegin;
1598 
1599   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1600   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1601 
1602   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1603 
1604   /* loop over matrix elements drawing boxes */
1605   color = PETSC_DRAW_BLUE;
1606   for (i=0,row=0; i<mbs; i++,row+=bs) {
1607     for (j=a->i[i]; j<a->i[i+1]; j++) {
1608       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610       aa = a->a + j*bs2;
1611       for (k=0; k<bs; k++) {
1612         for (l=0; l<bs; l++) {
1613           if (PetscRealPart(*aa++) >=  0.) continue;
1614           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1615         }
1616       }
1617     }
1618   }
1619   color = PETSC_DRAW_CYAN;
1620   for (i=0,row=0; i<mbs; i++,row+=bs) {
1621     for (j=a->i[i]; j<a->i[i+1]; j++) {
1622       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624       aa = a->a + j*bs2;
1625       for (k=0; k<bs; k++) {
1626         for (l=0; l<bs; l++) {
1627           if (PetscRealPart(*aa++) != 0.) continue;
1628           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1629         }
1630       }
1631     }
1632   }
1633 
1634   color = PETSC_DRAW_RED;
1635   for (i=0,row=0; i<mbs; i++,row+=bs) {
1636     for (j=a->i[i]; j<a->i[i+1]; j++) {
1637       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639       aa = a->a + j*bs2;
1640       for (k=0; k<bs; k++) {
1641         for (l=0; l<bs; l++) {
1642           if (PetscRealPart(*aa++) <= 0.) continue;
1643           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1644         }
1645       }
1646     }
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1653 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1654 {
1655   PetscErrorCode ierr;
1656   PetscReal      xl,yl,xr,yr,w,h;
1657   PetscDraw      draw;
1658   PetscTruth     isnull;
1659 
1660   PetscFunctionBegin;
1661 
1662   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1663   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1664 
1665   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1666   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1667   xr += w;    yr += h;  xl = -w;     yl = -h;
1668   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1669   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1670   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatView_SeqBAIJ"
1676 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1677 {
1678   PetscErrorCode ierr;
1679   PetscTruth     iascii,isbinary,isdraw;
1680 
1681   PetscFunctionBegin;
1682   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1683   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1684   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1685   if (iascii){
1686     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1687   } else if (isbinary) {
1688     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1689   } else if (isdraw) {
1690     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1691   } else {
1692     Mat B;
1693     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1694     ierr = MatView(B,viewer);CHKERRQ(ierr);
1695     ierr = MatDestroy(B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1703 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1704 {
1705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1706   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1707   PetscInt    *ai = a->i,*ailen = a->ilen;
1708   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1709   MatScalar   *ap,*aa = a->a;
1710 
1711   PetscFunctionBegin;
1712   for (k=0; k<m; k++) { /* loop over rows */
1713     row  = im[k]; brow = row/bs;
1714     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1715     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1716     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1717     nrow = ailen[brow];
1718     for (l=0; l<n; l++) { /* loop over columns */
1719       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1720       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1721       col  = in[l] ;
1722       bcol = col/bs;
1723       cidx = col%bs;
1724       ridx = row%bs;
1725       high = nrow;
1726       low  = 0; /* assume unsorted */
1727       while (high-low > 5) {
1728         t = (low+high)/2;
1729         if (rp[t] > bcol) high = t;
1730         else             low  = t;
1731       }
1732       for (i=low; i<high; i++) {
1733         if (rp[i] > bcol) break;
1734         if (rp[i] == bcol) {
1735           *v++ = ap[bs2*i+bs*cidx+ridx];
1736           goto finished;
1737         }
1738       }
1739       *v++ = 0.0;
1740       finished:;
1741     }
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #define CHUNKSIZE 10
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1749 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1750 {
1751   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1752   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1753   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1754   PetscErrorCode    ierr;
1755   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1756   PetscTruth        roworiented=a->roworiented;
1757   const PetscScalar *value = v;
1758   MatScalar         *ap,*aa = a->a,*bap;
1759 
1760   PetscFunctionBegin;
1761   if (roworiented) {
1762     stepval = (n-1)*bs;
1763   } else {
1764     stepval = (m-1)*bs;
1765   }
1766   for (k=0; k<m; k++) { /* loop over added rows */
1767     row  = im[k];
1768     if (row < 0) continue;
1769 #if defined(PETSC_USE_DEBUG)
1770     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1771 #endif
1772     rp   = aj + ai[row];
1773     ap   = aa + bs2*ai[row];
1774     rmax = imax[row];
1775     nrow = ailen[row];
1776     low  = 0;
1777     high = nrow;
1778     for (l=0; l<n; l++) { /* loop over added columns */
1779       if (in[l] < 0) continue;
1780 #if defined(PETSC_USE_DEBUG)
1781       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1782 #endif
1783       col = in[l];
1784       if (roworiented) {
1785         value = v + k*(stepval+bs)*bs + l*bs;
1786       } else {
1787         value = v + l*(stepval+bs)*bs + k*bs;
1788       }
1789       if (col <= lastcol) low = 0; else high = nrow;
1790       lastcol = col;
1791       while (high-low > 7) {
1792         t = (low+high)/2;
1793         if (rp[t] > col) high = t;
1794         else             low  = t;
1795       }
1796       for (i=low; i<high; i++) {
1797         if (rp[i] > col) break;
1798         if (rp[i] == col) {
1799           bap  = ap +  bs2*i;
1800           if (roworiented) {
1801             if (is == ADD_VALUES) {
1802               for (ii=0; ii<bs; ii++,value+=stepval) {
1803                 for (jj=ii; jj<bs2; jj+=bs) {
1804                   bap[jj] += *value++;
1805                 }
1806               }
1807             } else {
1808               for (ii=0; ii<bs; ii++,value+=stepval) {
1809                 for (jj=ii; jj<bs2; jj+=bs) {
1810                   bap[jj] = *value++;
1811                 }
1812               }
1813             }
1814           } else {
1815             if (is == ADD_VALUES) {
1816               for (ii=0; ii<bs; ii++,value+=stepval) {
1817                 for (jj=0; jj<bs; jj++) {
1818                   *bap++ += *value++;
1819                 }
1820               }
1821             } else {
1822               for (ii=0; ii<bs; ii++,value+=stepval) {
1823                 for (jj=0; jj<bs; jj++) {
1824                   *bap++  = *value++;
1825                 }
1826               }
1827             }
1828           }
1829           goto noinsert2;
1830         }
1831       }
1832       if (nonew == 1) goto noinsert2;
1833       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1834       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1835       N = nrow++ - 1; high++;
1836       /* shift up all the later entries in this row */
1837       for (ii=N; ii>=i; ii--) {
1838         rp[ii+1] = rp[ii];
1839         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1840       }
1841       if (N >= i) {
1842         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1843       }
1844       rp[i] = col;
1845       bap   = ap +  bs2*i;
1846       if (roworiented) {
1847         for (ii=0; ii<bs; ii++,value+=stepval) {
1848           for (jj=ii; jj<bs2; jj+=bs) {
1849             bap[jj] = *value++;
1850           }
1851         }
1852       } else {
1853         for (ii=0; ii<bs; ii++,value+=stepval) {
1854           for (jj=0; jj<bs; jj++) {
1855             *bap++  = *value++;
1856           }
1857         }
1858       }
1859       noinsert2:;
1860       low = i;
1861     }
1862     ailen[row] = nrow;
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1869 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1870 {
1871   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1872   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1873   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1874   PetscErrorCode ierr;
1875   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1876   MatScalar      *aa = a->a,*ap;
1877   PetscReal      ratio=0.6;
1878 
1879   PetscFunctionBegin;
1880   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1881 
1882   if (m) rmax = ailen[0];
1883   for (i=1; i<mbs; i++) {
1884     /* move each row back by the amount of empty slots (fshift) before it*/
1885     fshift += imax[i-1] - ailen[i-1];
1886     rmax   = PetscMax(rmax,ailen[i]);
1887     if (fshift) {
1888       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1889       N = ailen[i];
1890       for (j=0; j<N; j++) {
1891         ip[j-fshift] = ip[j];
1892         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1893       }
1894     }
1895     ai[i] = ai[i-1] + ailen[i-1];
1896   }
1897   if (mbs) {
1898     fshift += imax[mbs-1] - ailen[mbs-1];
1899     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1900   }
1901   /* reset ilen and imax for each row */
1902   for (i=0; i<mbs; i++) {
1903     ailen[i] = imax[i] = ai[i+1] - ai[i];
1904   }
1905   a->nz = ai[mbs];
1906 
1907   /* diagonals may have moved, so kill the diagonal pointers */
1908   a->idiagvalid = PETSC_FALSE;
1909   if (fshift && a->diag) {
1910     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1911     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1912     a->diag = 0;
1913   }
1914   if (fshift && a->nounused == -1) {
1915     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1916   }
1917   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1918   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1919   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1920   a->reallocs          = 0;
1921   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1922 
1923   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1924   if (a->compressedrow.use){
1925     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1926   }
1927 
1928   A->same_nonzero = PETSC_TRUE;
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 /*
1933    This function returns an array of flags which indicate the locations of contiguous
1934    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1935    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1936    Assume: sizes should be long enough to hold all the values.
1937 */
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1940 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1941 {
1942   PetscInt   i,j,k,row;
1943   PetscTruth flg;
1944 
1945   PetscFunctionBegin;
1946   for (i=0,j=0; i<n; j++) {
1947     row = idx[i];
1948     if (row%bs!=0) { /* Not the begining of a block */
1949       sizes[j] = 1;
1950       i++;
1951     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1952       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1953       i++;
1954     } else { /* Begining of the block, so check if the complete block exists */
1955       flg = PETSC_TRUE;
1956       for (k=1; k<bs; k++) {
1957         if (row+k != idx[i+k]) { /* break in the block */
1958           flg = PETSC_FALSE;
1959           break;
1960         }
1961       }
1962       if (flg) { /* No break in the bs */
1963         sizes[j] = bs;
1964         i+= bs;
1965       } else {
1966         sizes[j] = 1;
1967         i++;
1968       }
1969     }
1970   }
1971   *bs_max = j;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1977 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1978 {
1979   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1980   PetscErrorCode ierr;
1981   PetscInt       i,j,k,count,*rows;
1982   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1983   PetscScalar    zero = 0.0;
1984   MatScalar      *aa;
1985 
1986   PetscFunctionBegin;
1987   /* Make a copy of the IS and  sort it */
1988   /* allocate memory for rows,sizes */
1989   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1990   sizes = rows + is_n;
1991 
1992   /* copy IS values to rows, and sort them */
1993   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1994   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1995   if (baij->keepzeroedrows) {
1996     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1997     bs_max = is_n;
1998     A->same_nonzero = PETSC_TRUE;
1999   } else {
2000     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2001     A->same_nonzero = PETSC_FALSE;
2002   }
2003 
2004   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2005     row   = rows[j];
2006     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2007     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2008     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2009     if (sizes[i] == bs && !baij->keepzeroedrows) {
2010       if (diag != 0.0) {
2011         if (baij->ilen[row/bs] > 0) {
2012           baij->ilen[row/bs]       = 1;
2013           baij->j[baij->i[row/bs]] = row/bs;
2014           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2015         }
2016         /* Now insert all the diagonal values for this bs */
2017         for (k=0; k<bs; k++) {
2018           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2019         }
2020       } else { /* (diag == 0.0) */
2021         baij->ilen[row/bs] = 0;
2022       } /* end (diag == 0.0) */
2023     } else { /* (sizes[i] != bs) */
2024 #if defined (PETSC_USE_DEBUG)
2025       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2026 #endif
2027       for (k=0; k<count; k++) {
2028         aa[0] =  zero;
2029         aa    += bs;
2030       }
2031       if (diag != 0.0) {
2032         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2033       }
2034     }
2035   }
2036 
2037   ierr = PetscFree(rows);CHKERRQ(ierr);
2038   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2044 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2045 {
2046   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2047   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2048   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2049   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2050   PetscErrorCode ierr;
2051   PetscInt       ridx,cidx,bs2=a->bs2;
2052   PetscTruth     roworiented=a->roworiented;
2053   MatScalar      *ap,value,*aa=a->a,*bap;
2054 
2055   PetscFunctionBegin;
2056   for (k=0; k<m; k++) { /* loop over added rows */
2057     row  = im[k];
2058     brow = row/bs;
2059     if (row < 0) continue;
2060 #if defined(PETSC_USE_DEBUG)
2061     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2062 #endif
2063     rp   = aj + ai[brow];
2064     ap   = aa + bs2*ai[brow];
2065     rmax = imax[brow];
2066     nrow = ailen[brow];
2067     low  = 0;
2068     high = nrow;
2069     for (l=0; l<n; l++) { /* loop over added columns */
2070       if (in[l] < 0) continue;
2071 #if defined(PETSC_USE_DEBUG)
2072       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2073 #endif
2074       col = in[l]; bcol = col/bs;
2075       ridx = row % bs; cidx = col % bs;
2076       if (roworiented) {
2077         value = v[l + k*n];
2078       } else {
2079         value = v[k + l*m];
2080       }
2081       if (col <= lastcol) low = 0; else high = nrow;
2082       lastcol = col;
2083       while (high-low > 7) {
2084         t = (low+high)/2;
2085         if (rp[t] > bcol) high = t;
2086         else              low  = t;
2087       }
2088       for (i=low; i<high; i++) {
2089         if (rp[i] > bcol) break;
2090         if (rp[i] == bcol) {
2091           bap  = ap +  bs2*i + bs*cidx + ridx;
2092           if (is == ADD_VALUES) *bap += value;
2093           else                  *bap  = value;
2094           goto noinsert1;
2095         }
2096       }
2097       if (nonew == 1) goto noinsert1;
2098       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2099       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2100       N = nrow++ - 1; high++;
2101       /* shift up all the later entries in this row */
2102       for (ii=N; ii>=i; ii--) {
2103         rp[ii+1] = rp[ii];
2104         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2105       }
2106       if (N>=i) {
2107         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2108       }
2109       rp[i]                      = bcol;
2110       ap[bs2*i + bs*cidx + ridx] = value;
2111       a->nz++;
2112       noinsert1:;
2113       low = i;
2114     }
2115     ailen[brow] = nrow;
2116   }
2117   A->same_nonzero = PETSC_FALSE;
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2125 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2126 {
2127   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2128   Mat            outA;
2129   PetscErrorCode ierr;
2130   PetscTruth     row_identity,col_identity;
2131 
2132   PetscFunctionBegin;
2133   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2134   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2135   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2136   if (!row_identity || !col_identity) {
2137     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2138   }
2139 
2140   outA          = inA;
2141   inA->factor   = MAT_FACTOR_LU;
2142 
2143   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2144 
2145   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2146   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2147   a->row = row;
2148   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2149   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2150   a->col = col;
2151 
2152   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2153   if (a->icol) {
2154     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2155   }
2156   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2158 
2159   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2160   if (!a->solve_work) {
2161     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2162     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2163   }
2164   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2165 
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 EXTERN_C_BEGIN
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2172 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2173 {
2174   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2175   PetscInt    i,nz,nbs;
2176 
2177   PetscFunctionBegin;
2178   nz  = baij->maxnz/baij->bs2;
2179   nbs = baij->nbs;
2180   for (i=0; i<nz; i++) {
2181     baij->j[i] = indices[i];
2182   }
2183   baij->nz = nz;
2184   for (i=0; i<nbs; i++) {
2185     baij->ilen[i] = baij->imax[i];
2186   }
2187 
2188   PetscFunctionReturn(0);
2189 }
2190 EXTERN_C_END
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2194 /*@
2195     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2196        in the matrix.
2197 
2198   Input Parameters:
2199 +  mat - the SeqBAIJ matrix
2200 -  indices - the column indices
2201 
2202   Level: advanced
2203 
2204   Notes:
2205     This can be called if you have precomputed the nonzero structure of the
2206   matrix and want to provide it to the matrix object to improve the performance
2207   of the MatSetValues() operation.
2208 
2209     You MUST have set the correct numbers of nonzeros per row in the call to
2210   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2211 
2212     MUST be called before any calls to MatSetValues();
2213 
2214 @*/
2215 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2216 {
2217   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2218 
2219   PetscFunctionBegin;
2220   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2221   PetscValidPointer(indices,2);
2222   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2223   if (f) {
2224     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2225   } else {
2226     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2233 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2234 {
2235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2236   PetscErrorCode ierr;
2237   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2238   PetscReal      atmp;
2239   PetscScalar    *x,zero = 0.0;
2240   MatScalar      *aa;
2241   PetscInt       ncols,brow,krow,kcol;
2242 
2243   PetscFunctionBegin;
2244   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2245   bs   = A->rmap->bs;
2246   aa   = a->a;
2247   ai   = a->i;
2248   aj   = a->j;
2249   mbs  = a->mbs;
2250 
2251   ierr = VecSet(v,zero);CHKERRQ(ierr);
2252   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2253   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2254   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2255   for (i=0; i<mbs; i++) {
2256     ncols = ai[1] - ai[0]; ai++;
2257     brow  = bs*i;
2258     for (j=0; j<ncols; j++){
2259       for (kcol=0; kcol<bs; kcol++){
2260         for (krow=0; krow<bs; krow++){
2261           atmp = PetscAbsScalar(*aa);aa++;
2262           row = brow + krow;    /* row index */
2263           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2264           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2265         }
2266       }
2267       aj++;
2268     }
2269   }
2270   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatCopy_SeqBAIJ"
2276 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2277 {
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   /* If the two matrices have the same copy implementation, use fast copy. */
2282   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2283     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2284     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2285 
2286     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2287       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2288     }
2289     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2290   } else {
2291     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2292   }
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2298 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2299 {
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2309 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2310 {
2311   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2312   PetscFunctionBegin;
2313   *array = a->a;
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2319 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2320 {
2321   PetscFunctionBegin;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #include "petscblaslapack.h"
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2328 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2329 {
2330   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2331   PetscErrorCode ierr;
2332   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2333   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2334 
2335   PetscFunctionBegin;
2336   if (str == SAME_NONZERO_PATTERN) {
2337     PetscScalar alpha = a;
2338     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2339   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2340     if (y->xtoy && y->XtoY != X) {
2341       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2342       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2343     }
2344     if (!y->xtoy) { /* get xtoy */
2345       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2346       y->XtoY = X;
2347       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2348     }
2349     bs2 = bs*bs;
2350     for (i=0; i<x->nz; i++) {
2351       j = 0;
2352       while (j < bs2){
2353         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2354         j++;
2355       }
2356     }
2357     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2358   } else {
2359     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2360   }
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2366 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2367 {
2368   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2369   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2370   MatScalar      *aa = a->a;
2371 
2372   PetscFunctionBegin;
2373   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2379 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2380 {
2381   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2382   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2383   MatScalar      *aa = a->a;
2384 
2385   PetscFunctionBegin;
2386   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 
2391 /* -------------------------------------------------------------------*/
2392 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2393        MatGetRow_SeqBAIJ,
2394        MatRestoreRow_SeqBAIJ,
2395        MatMult_SeqBAIJ_N,
2396 /* 4*/ MatMultAdd_SeqBAIJ_N,
2397        MatMultTranspose_SeqBAIJ,
2398        MatMultTransposeAdd_SeqBAIJ,
2399        0,
2400        0,
2401        0,
2402 /*10*/ 0,
2403        MatLUFactor_SeqBAIJ,
2404        0,
2405        0,
2406        MatTranspose_SeqBAIJ,
2407 /*15*/ MatGetInfo_SeqBAIJ,
2408        MatEqual_SeqBAIJ,
2409        MatGetDiagonal_SeqBAIJ,
2410        MatDiagonalScale_SeqBAIJ,
2411        MatNorm_SeqBAIJ,
2412 /*20*/ 0,
2413        MatAssemblyEnd_SeqBAIJ,
2414        0,
2415        MatSetOption_SeqBAIJ,
2416        MatZeroEntries_SeqBAIJ,
2417 /*25*/ MatZeroRows_SeqBAIJ,
2418        0,
2419        0,
2420        0,
2421        0,
2422 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2423        0,
2424        0,
2425        MatGetArray_SeqBAIJ,
2426        MatRestoreArray_SeqBAIJ,
2427 /*35*/ MatDuplicate_SeqBAIJ,
2428        0,
2429        0,
2430        MatILUFactor_SeqBAIJ,
2431        0,
2432 /*40*/ MatAXPY_SeqBAIJ,
2433        MatGetSubMatrices_SeqBAIJ,
2434        MatIncreaseOverlap_SeqBAIJ,
2435        MatGetValues_SeqBAIJ,
2436        MatCopy_SeqBAIJ,
2437 /*45*/ 0,
2438        MatScale_SeqBAIJ,
2439        0,
2440        0,
2441        0,
2442 /*50*/ 0,
2443        MatGetRowIJ_SeqBAIJ,
2444        MatRestoreRowIJ_SeqBAIJ,
2445        0,
2446        0,
2447 /*55*/ 0,
2448        0,
2449        0,
2450        0,
2451        MatSetValuesBlocked_SeqBAIJ,
2452 /*60*/ MatGetSubMatrix_SeqBAIJ,
2453        MatDestroy_SeqBAIJ,
2454        MatView_SeqBAIJ,
2455        0,
2456        0,
2457 /*65*/ 0,
2458        0,
2459        0,
2460        0,
2461        0,
2462 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2463        0,
2464        MatConvert_Basic,
2465        0,
2466        0,
2467 /*75*/ 0,
2468        0,
2469        0,
2470        0,
2471        0,
2472 /*80*/ 0,
2473        0,
2474        0,
2475        0,
2476        MatLoad_SeqBAIJ,
2477 /*85*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*90*/ 0,
2483        0,
2484        0,
2485        0,
2486        0,
2487 /*95*/ 0,
2488        0,
2489        0,
2490        0,
2491        0,
2492 /*100*/0,
2493        0,
2494        0,
2495        0,
2496        0,
2497 /*105*/0,
2498        MatRealPart_SeqBAIJ,
2499        MatImaginaryPart_SeqBAIJ,
2500        0,
2501        0,
2502 /*110*/0,
2503        0,
2504        0,
2505        0,
2506        MatMissingDiagonal_SeqBAIJ
2507 /*115*/
2508 };
2509 
2510 EXTERN_C_BEGIN
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2513 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2514 {
2515   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2516   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   if (aij->nonew != 1) {
2521     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2522   }
2523 
2524   /* allocate space for values if not already there */
2525   if (!aij->saved_values) {
2526     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2527   }
2528 
2529   /* copy values over */
2530   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 EXTERN_C_END
2534 
2535 EXTERN_C_BEGIN
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2538 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2539 {
2540   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2541   PetscErrorCode ierr;
2542   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2543 
2544   PetscFunctionBegin;
2545   if (aij->nonew != 1) {
2546     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2547   }
2548   if (!aij->saved_values) {
2549     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2550   }
2551 
2552   /* copy values over */
2553   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 EXTERN_C_END
2557 
2558 EXTERN_C_BEGIN
2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2561 EXTERN_C_END
2562 
2563 EXTERN_C_BEGIN
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2566 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2567 {
2568   Mat_SeqBAIJ    *b;
2569   PetscErrorCode ierr;
2570   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2571   PetscTruth     flg,skipallocation = PETSC_FALSE;
2572 
2573   PetscFunctionBegin;
2574 
2575   if (nz == MAT_SKIP_ALLOCATION) {
2576     skipallocation = PETSC_TRUE;
2577     nz             = 0;
2578   }
2579 
2580   if (bs < 0) {
2581     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2582       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2583     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2584     bs   = PetscAbs(bs);
2585   }
2586   if (nnz && newbs != bs) {
2587     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2588   }
2589   bs   = newbs;
2590 
2591   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2592   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2593   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2594   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2595 
2596   B->preallocated = PETSC_TRUE;
2597 
2598   mbs  = B->rmap->n/bs;
2599   nbs  = B->cmap->n/bs;
2600   bs2  = bs*bs;
2601 
2602   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2603     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2604   }
2605 
2606   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2607   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2608   if (nnz) {
2609     for (i=0; i<mbs; i++) {
2610       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2611       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2612     }
2613   }
2614 
2615   b       = (Mat_SeqBAIJ*)B->data;
2616   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2617     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2618   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2619 
2620   if (!flg) {
2621     switch (bs) {
2622     case 1:
2623       B->ops->mult            = MatMult_SeqBAIJ_1;
2624       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2625       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2626       break;
2627     case 2:
2628       B->ops->mult            = MatMult_SeqBAIJ_2;
2629       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2630       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2631       break;
2632     case 3:
2633       B->ops->mult            = MatMult_SeqBAIJ_3;
2634       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2635       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2636       break;
2637     case 4:
2638       B->ops->mult            = MatMult_SeqBAIJ_4;
2639       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2640       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2641       break;
2642     case 5:
2643       B->ops->mult            = MatMult_SeqBAIJ_5;
2644       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2645       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2646       break;
2647     case 6:
2648       B->ops->mult            = MatMult_SeqBAIJ_6;
2649       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2650       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2651       break;
2652     case 7:
2653       B->ops->mult            = MatMult_SeqBAIJ_7;
2654       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2655       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2656       break;
2657     default:
2658       B->ops->mult            = MatMult_SeqBAIJ_N;
2659       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2660       break;
2661     }
2662   }
2663   B->rmap->bs      = bs;
2664   b->mbs     = mbs;
2665   b->nbs     = nbs;
2666   if (!skipallocation) {
2667     if (!b->imax) {
2668       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2669     }
2670     /* b->ilen will count nonzeros in each block row so far. */
2671     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2672     if (!nnz) {
2673       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2674       else if (nz <= 0)        nz = 1;
2675       for (i=0; i<mbs; i++) b->imax[i] = nz;
2676       nz = nz*mbs;
2677     } else {
2678       nz = 0;
2679       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2680     }
2681 
2682     /* allocate the matrix space */
2683     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2684     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2685     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2686     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2687     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2688     b->singlemalloc = PETSC_TRUE;
2689     b->i[0] = 0;
2690     for (i=1; i<mbs+1; i++) {
2691       b->i[i] = b->i[i-1] + b->imax[i-1];
2692     }
2693     b->free_a     = PETSC_TRUE;
2694     b->free_ij    = PETSC_TRUE;
2695   } else {
2696     b->free_a     = PETSC_FALSE;
2697     b->free_ij    = PETSC_FALSE;
2698   }
2699 
2700   B->rmap->bs          = bs;
2701   b->bs2              = bs2;
2702   b->mbs              = mbs;
2703   b->nz               = 0;
2704   b->maxnz            = nz*bs2;
2705   B->info.nz_unneeded = (PetscReal)b->maxnz;
2706   PetscFunctionReturn(0);
2707 }
2708 EXTERN_C_END
2709 
2710 EXTERN_C_BEGIN
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2713 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2714 {
2715   PetscInt       i,m,nz,nz_max=0,*nnz;
2716   PetscScalar    *values=0;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720 
2721   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2722 
2723   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2724   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2725   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2726   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2727   m = B->rmap->n/bs;
2728 
2729   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2730   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2731   for(i=0; i<m; i++) {
2732     nz = ii[i+1]- ii[i];
2733     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2734     nz_max = PetscMax(nz_max, nz);
2735     nnz[i] = nz;
2736   }
2737   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2738   ierr = PetscFree(nnz);CHKERRQ(ierr);
2739 
2740   values = (PetscScalar*)V;
2741   if (!values) {
2742     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2743     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2744   }
2745   for (i=0; i<m; i++) {
2746     PetscInt          ncols  = ii[i+1] - ii[i];
2747     const PetscInt    *icols = jj + ii[i];
2748     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2749     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2750   }
2751   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2752   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2753   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754 
2755   PetscFunctionReturn(0);
2756 }
2757 EXTERN_C_END
2758 
2759 
2760 EXTERN_C_BEGIN
2761 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2763 EXTERN_C_END
2764 
2765 /*MC
2766    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2767    block sparse compressed row format.
2768 
2769    Options Database Keys:
2770 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2771 
2772   Level: beginner
2773 
2774 .seealso: MatCreateSeqBAIJ()
2775 M*/
2776 
2777 
2778 EXTERN_C_BEGIN
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatCreate_SeqBAIJ"
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2782 {
2783   PetscErrorCode ierr;
2784   PetscMPIInt    size;
2785   Mat_SeqBAIJ    *b;
2786 
2787   PetscFunctionBegin;
2788   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2789   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2790 
2791   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2792   B->data = (void*)b;
2793   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2794   B->mapping          = 0;
2795   b->row              = 0;
2796   b->col              = 0;
2797   b->icol             = 0;
2798   b->reallocs         = 0;
2799   b->saved_values     = 0;
2800 
2801   b->roworiented      = PETSC_TRUE;
2802   b->nonew            = 0;
2803   b->diag             = 0;
2804   b->solve_work       = 0;
2805   b->mult_work        = 0;
2806   B->spptr            = 0;
2807   B->info.nz_unneeded = (PetscReal)b->maxnz;
2808   b->keepzeroedrows   = PETSC_FALSE;
2809   b->xtoy              = 0;
2810   b->XtoY              = 0;
2811   b->compressedrow.use     = PETSC_FALSE;
2812   b->compressedrow.nrows   = 0;
2813   b->compressedrow.i       = PETSC_NULL;
2814   b->compressedrow.rindex  = PETSC_NULL;
2815   b->compressedrow.checked = PETSC_FALSE;
2816   B->same_nonzero          = PETSC_FALSE;
2817 
2818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2819                                      "MatGetFactorAvailable_seqbaij_petsc",
2820                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2822                                      "MatGetFactor_seqbaij_petsc",
2823                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2825                                      "MatInvertBlockDiagonal_SeqBAIJ",
2826                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2828                                      "MatStoreValues_SeqBAIJ",
2829                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2831                                      "MatRetrieveValues_SeqBAIJ",
2832                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2834                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2835                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2837                                      "MatConvert_SeqBAIJ_SeqAIJ",
2838                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2840                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2841                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2843                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2844                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2846                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2847                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2848   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2849   PetscFunctionReturn(0);
2850 }
2851 EXTERN_C_END
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2855 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2856 {
2857   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2858   PetscErrorCode ierr;
2859   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2860 
2861   PetscFunctionBegin;
2862   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2863 
2864   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2865   for (i=0; i<mbs; i++) {
2866     c->imax[i] = a->imax[i];
2867     c->ilen[i] = a->ilen[i];
2868   }
2869 
2870   /* allocate the matrix space */
2871   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2872   c->singlemalloc = PETSC_TRUE;
2873   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2874   if (mbs > 0) {
2875     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2876     if (cpvalues == MAT_COPY_VALUES) {
2877       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2878     } else {
2879       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2880     }
2881   }
2882   c->roworiented = a->roworiented;
2883   c->nonew       = a->nonew;
2884   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2885   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2886   c->bs2         = a->bs2;
2887   c->mbs         = a->mbs;
2888   c->nbs         = a->nbs;
2889 
2890   if (a->diag) {
2891     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2892     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2893     for (i=0; i<mbs; i++) {
2894       c->diag[i] = a->diag[i];
2895     }
2896   } else c->diag        = 0;
2897   c->nz                 = a->nz;
2898   c->maxnz              = a->maxnz;
2899   c->solve_work         = 0;
2900   c->mult_work          = 0;
2901   c->free_a             = PETSC_TRUE;
2902   c->free_ij            = PETSC_TRUE;
2903   C->preallocated       = PETSC_TRUE;
2904   C->assembled          = PETSC_TRUE;
2905 
2906   c->compressedrow.use     = a->compressedrow.use;
2907   c->compressedrow.nrows   = a->compressedrow.nrows;
2908   c->compressedrow.checked = a->compressedrow.checked;
2909   if ( a->compressedrow.checked && a->compressedrow.use){
2910     i = a->compressedrow.nrows;
2911     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2912     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2913     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2914     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2915   } else {
2916     c->compressedrow.use    = PETSC_FALSE;
2917     c->compressedrow.i      = PETSC_NULL;
2918     c->compressedrow.rindex = PETSC_NULL;
2919   }
2920   C->same_nonzero = A->same_nonzero;
2921   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 #undef __FUNCT__
2926 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2927 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2928 {
2929     PetscErrorCode ierr;
2930 
2931   PetscFunctionBegin;
2932   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2933   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2934   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2935   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatLoad_SeqBAIJ"
2941 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2942 {
2943   Mat_SeqBAIJ    *a;
2944   Mat            B;
2945   PetscErrorCode ierr;
2946   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2947   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2948   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2949   PetscInt       *masked,nmask,tmp,bs2,ishift;
2950   PetscMPIInt    size;
2951   int            fd;
2952   PetscScalar    *aa;
2953   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2954 
2955   PetscFunctionBegin;
2956   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2957     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2958   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2959   bs2  = bs*bs;
2960 
2961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2962   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2963   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2964   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2965   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2966   M = header[1]; N = header[2]; nz = header[3];
2967 
2968   if (header[3] < 0) {
2969     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2970   }
2971 
2972   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2973 
2974   /*
2975      This code adds extra rows to make sure the number of rows is
2976     divisible by the blocksize
2977   */
2978   mbs        = M/bs;
2979   extra_rows = bs - M + bs*(mbs);
2980   if (extra_rows == bs) extra_rows = 0;
2981   else                  mbs++;
2982   if (extra_rows) {
2983     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2984   }
2985 
2986   /* read in row lengths */
2987   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2988   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2989   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2990 
2991   /* read in column indices */
2992   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2993   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2994   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2995 
2996   /* loop over row lengths determining block row lengths */
2997   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2998   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2999   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3000   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3001   masked   = mask + mbs;
3002   rowcount = 0; nzcount = 0;
3003   for (i=0; i<mbs; i++) {
3004     nmask = 0;
3005     for (j=0; j<bs; j++) {
3006       kmax = rowlengths[rowcount];
3007       for (k=0; k<kmax; k++) {
3008         tmp = jj[nzcount++]/bs;
3009         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3010       }
3011       rowcount++;
3012     }
3013     browlengths[i] += nmask;
3014     /* zero out the mask elements we set */
3015     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3016   }
3017 
3018   /* create our matrix */
3019   ierr = MatCreate(comm,&B);
3020   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3021   ierr = MatSetType(B,type);CHKERRQ(ierr);
3022   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3023   a = (Mat_SeqBAIJ*)B->data;
3024 
3025   /* set matrix "i" values */
3026   a->i[0] = 0;
3027   for (i=1; i<= mbs; i++) {
3028     a->i[i]      = a->i[i-1] + browlengths[i-1];
3029     a->ilen[i-1] = browlengths[i-1];
3030   }
3031   a->nz         = 0;
3032   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3033 
3034   /* read in nonzero values */
3035   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3036   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3037   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3038 
3039   /* set "a" and "j" values into matrix */
3040   nzcount = 0; jcount = 0;
3041   for (i=0; i<mbs; i++) {
3042     nzcountb = nzcount;
3043     nmask    = 0;
3044     for (j=0; j<bs; j++) {
3045       kmax = rowlengths[i*bs+j];
3046       for (k=0; k<kmax; k++) {
3047         tmp = jj[nzcount++]/bs;
3048 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3049       }
3050     }
3051     /* sort the masked values */
3052     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3053 
3054     /* set "j" values into matrix */
3055     maskcount = 1;
3056     for (j=0; j<nmask; j++) {
3057       a->j[jcount++]  = masked[j];
3058       mask[masked[j]] = maskcount++;
3059     }
3060     /* set "a" values into matrix */
3061     ishift = bs2*a->i[i];
3062     for (j=0; j<bs; j++) {
3063       kmax = rowlengths[i*bs+j];
3064       for (k=0; k<kmax; k++) {
3065         tmp       = jj[nzcountb]/bs ;
3066         block     = mask[tmp] - 1;
3067         point     = jj[nzcountb] - bs*tmp;
3068         idx       = ishift + bs2*block + j + bs*point;
3069         a->a[idx] = (MatScalar)aa[nzcountb++];
3070       }
3071     }
3072     /* zero out the mask elements we set */
3073     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3074   }
3075   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3076 
3077   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3078   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3079   ierr = PetscFree(aa);CHKERRQ(ierr);
3080   ierr = PetscFree(jj);CHKERRQ(ierr);
3081   ierr = PetscFree(mask);CHKERRQ(ierr);
3082 
3083   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3085   ierr = MatView_Private(B);CHKERRQ(ierr);
3086 
3087   *A = B;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 #undef __FUNCT__
3092 #define __FUNCT__ "MatCreateSeqBAIJ"
3093 /*@C
3094    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3095    compressed row) format.  For good matrix assembly performance the
3096    user should preallocate the matrix storage by setting the parameter nz
3097    (or the array nnz).  By setting these parameters accurately, performance
3098    during matrix assembly can be increased by more than a factor of 50.
3099 
3100    Collective on MPI_Comm
3101 
3102    Input Parameters:
3103 +  comm - MPI communicator, set to PETSC_COMM_SELF
3104 .  bs - size of block
3105 .  m - number of rows
3106 .  n - number of columns
3107 .  nz - number of nonzero blocks  per block row (same for all rows)
3108 -  nnz - array containing the number of nonzero blocks in the various block rows
3109          (possibly different for each block row) or PETSC_NULL
3110 
3111    Output Parameter:
3112 .  A - the matrix
3113 
3114    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3115    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3116    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3117 
3118    Options Database Keys:
3119 .   -mat_no_unroll - uses code that does not unroll the loops in the
3120                      block calculations (much slower)
3121 .    -mat_block_size - size of the blocks to use
3122 
3123    Level: intermediate
3124 
3125    Notes:
3126    The number of rows and columns must be divisible by blocksize.
3127 
3128    If the nnz parameter is given then the nz parameter is ignored
3129 
3130    A nonzero block is any block that as 1 or more nonzeros in it
3131 
3132    The block AIJ format is fully compatible with standard Fortran 77
3133    storage.  That is, the stored row and column indices can begin at
3134    either one (as in Fortran) or zero.  See the users' manual for details.
3135 
3136    Specify the preallocated storage with either nz or nnz (not both).
3137    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3138    allocation.  For additional details, see the users manual chapter on
3139    matrices.
3140 
3141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3142 @*/
3143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3144 {
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3149   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3150   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3151   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3157 /*@C
3158    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3159    per row in the matrix. For good matrix assembly performance the
3160    user should preallocate the matrix storage by setting the parameter nz
3161    (or the array nnz).  By setting these parameters accurately, performance
3162    during matrix assembly can be increased by more than a factor of 50.
3163 
3164    Collective on MPI_Comm
3165 
3166    Input Parameters:
3167 +  A - the matrix
3168 .  bs - size of block
3169 .  nz - number of block nonzeros per block row (same for all rows)
3170 -  nnz - array containing the number of block nonzeros in the various block rows
3171          (possibly different for each block row) or PETSC_NULL
3172 
3173    Options Database Keys:
3174 .   -mat_no_unroll - uses code that does not unroll the loops in the
3175                      block calculations (much slower)
3176 .    -mat_block_size - size of the blocks to use
3177 
3178    Level: intermediate
3179 
3180    Notes:
3181    If the nnz parameter is given then the nz parameter is ignored
3182 
3183    You can call MatGetInfo() to get information on how effective the preallocation was;
3184    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3185    You can also run with the option -info and look for messages with the string
3186    malloc in them to see if additional memory allocation was needed.
3187 
3188    The block AIJ format is fully compatible with standard Fortran 77
3189    storage.  That is, the stored row and column indices can begin at
3190    either one (as in Fortran) or zero.  See the users' manual for details.
3191 
3192    Specify the preallocated storage with either nz or nnz (not both).
3193    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3194    allocation.  For additional details, see the users manual chapter on
3195    matrices.
3196 
3197 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3198 @*/
3199 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3200 {
3201   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3202 
3203   PetscFunctionBegin;
3204   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3205   if (f) {
3206     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3207   }
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3213 /*@C
3214    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3215    (the default sequential PETSc format).
3216 
3217    Collective on MPI_Comm
3218 
3219    Input Parameters:
3220 +  A - the matrix
3221 .  i - the indices into j for the start of each local row (starts with zero)
3222 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3223 -  v - optional values in the matrix
3224 
3225    Level: developer
3226 
3227 .keywords: matrix, aij, compressed row, sparse
3228 
3229 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3230 @*/
3231 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3232 {
3233   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3234 
3235   PetscFunctionBegin;
3236   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3237   if (f) {
3238     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3239   }
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3246 /*@
3247      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3248               (upper triangular entries in CSR format) provided by the user.
3249 
3250      Collective on MPI_Comm
3251 
3252    Input Parameters:
3253 +  comm - must be an MPI communicator of size 1
3254 .  bs - size of block
3255 .  m - number of rows
3256 .  n - number of columns
3257 .  i - row indices
3258 .  j - column indices
3259 -  a - matrix values
3260 
3261    Output Parameter:
3262 .  mat - the matrix
3263 
3264    Level: intermediate
3265 
3266    Notes:
3267        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3268     once the matrix is destroyed
3269 
3270        You cannot set new nonzero locations into this matrix, that will generate an error.
3271 
3272        The i and j indices are 0 based
3273 
3274 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3275 
3276 @*/
3277 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3278 {
3279   PetscErrorCode ierr;
3280   PetscInt       ii;
3281   Mat_SeqBAIJ    *baij;
3282 
3283   PetscFunctionBegin;
3284   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3285   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3286 
3287   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3288   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3289   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3290   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3291   baij = (Mat_SeqBAIJ*)(*mat)->data;
3292   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3293 
3294   baij->i = i;
3295   baij->j = j;
3296   baij->a = a;
3297   baij->singlemalloc = PETSC_FALSE;
3298   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3299   baij->free_a       = PETSC_FALSE;
3300   baij->free_ij       = PETSC_FALSE;
3301 
3302   for (ii=0; ii<m; ii++) {
3303     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3304 #if defined(PETSC_USE_DEBUG)
3305     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3306 #endif
3307   }
3308 #if defined(PETSC_USE_DEBUG)
3309   for (ii=0; ii<baij->i[m]; ii++) {
3310     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3311     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3312   }
3313 #endif
3314 
3315   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3316   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319