xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 04b7c42f72c03fd77d047c8ce4a771bfc583ae8d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "../src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "../src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   if (A->rmap->n > 0 && !jj) {
1105     *missing  = PETSC_TRUE;
1106     if (d) *d = 0;
1107     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1108   } else {
1109     diag     = a->diag;
1110     for (i=0; i<a->mbs; i++) {
1111       if (jj[diag[i]] != i) {
1112         *missing  = PETSC_TRUE;
1113         if (d) *d = i;
1114         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1115       }
1116     }
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscErrorCode ierr;
1127   PetscInt       i,j,m = a->mbs;
1128 
1129   PetscFunctionBegin;
1130   if (!a->diag) {
1131     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1132   }
1133   for (i=0; i<m; i++) {
1134     a->diag[i] = a->i[i+1];
1135     for (j=a->i[i]; j<a->i[i+1]; j++) {
1136       if (a->j[j] == i) {
1137         a->diag[i] = j;
1138         break;
1139       }
1140     }
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 
1146 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1150 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1151 {
1152   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1155   PetscInt       *tia, *tja;
1156 
1157   PetscFunctionBegin;
1158   *nn = n;
1159   if (!ia) PetscFunctionReturn(0);
1160   if (symmetric) {
1161     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1162   } else {
1163     tia = a->i; tja = a->j;
1164   }
1165 
1166   if (!blockcompressed && bs > 1) {
1167     (*nn) *= bs;
1168     nbs    = bs;
1169     /* malloc & create the natural set of indices */
1170     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1171     if (n) {
1172       (*ia)[0] = 0;
1173       for (j=1; j<bs; j++) {
1174         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1175       }
1176     }
1177 
1178     for (i=1; i<n; i++) {
1179       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1180       for (j=1; j<bs; j++) {
1181         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1182       }
1183     }
1184     if (n) {
1185       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1186     }
1187 
1188     if (ja) {
1189       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1190       cnt = 0;
1191       for (i=0; i<n; i++) {
1192         for (j=0; j<bs; j++) {
1193           for (k=tia[i]; k<tia[i+1]; k++) {
1194             for (l=0; l<bs; l++) {
1195               (*ja)[cnt++] = bs*tja[k] + l;
1196 	    }
1197           }
1198         }
1199       }
1200     }
1201 
1202     n     *= bs;
1203     nz *= bs*bs;
1204     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1205       ierr = PetscFree(tia);CHKERRQ(ierr);
1206       ierr = PetscFree(tja);CHKERRQ(ierr);
1207     }
1208   } else {
1209     *ia = tia;
1210     if (ja) *ja = tja;
1211   }
1212   if (oshift == 1) {
1213     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1214     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1221 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1222 {
1223   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225   PetscInt       i,n = a->mbs,nz = a->i[n];
1226 
1227   PetscFunctionBegin;
1228   if (!ia) PetscFunctionReturn(0);
1229   if (!blockcompressed && A->rmap->bs > 1) {
1230     ierr = PetscFree(*ia);CHKERRQ(ierr);
1231     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1232   } else if (symmetric) {
1233     ierr = PetscFree(*ia);CHKERRQ(ierr);
1234     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1235   } else if (oshift == 1) { /* blockcompressed */
1236     for (i=0; i<n+1; i++) a->i[i]--;
1237     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1244 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1245 {
1246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250 #if defined(PETSC_USE_LOG)
1251   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1252 #endif
1253   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1254   if (a->row) {
1255     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1256   }
1257   if (a->col) {
1258     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1259   }
1260   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1261   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1262   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1263   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1264   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1265   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1266   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1267   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1268   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1269 
1270   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1271   ierr = PetscFree(a);CHKERRQ(ierr);
1272 
1273   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1287 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1288 {
1289   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1290   PetscErrorCode ierr;
1291 
1292   PetscFunctionBegin;
1293   switch (op) {
1294   case MAT_ROW_ORIENTED:
1295     a->roworiented    = flg;
1296     break;
1297   case MAT_KEEP_ZEROED_ROWS:
1298     a->keepzeroedrows = flg;
1299     break;
1300   case MAT_NEW_NONZERO_LOCATIONS:
1301     a->nonew          = (flg ? 0 : 1);
1302     break;
1303   case MAT_NEW_NONZERO_LOCATION_ERR:
1304     a->nonew          = (flg ? -1 : 0);
1305     break;
1306   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1307     a->nonew          = (flg ? -2 : 0);
1308     break;
1309   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1310     a->nounused       = (flg ? -1 : 0);
1311     break;
1312   case MAT_NEW_DIAGONALS:
1313   case MAT_IGNORE_OFF_PROC_ENTRIES:
1314   case MAT_USE_HASH_TABLE:
1315     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1316     break;
1317   case MAT_SYMMETRIC:
1318   case MAT_STRUCTURALLY_SYMMETRIC:
1319   case MAT_HERMITIAN:
1320   case MAT_SYMMETRY_ETERNAL:
1321     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1322     break;
1323   default:
1324     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1331 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1334   PetscErrorCode ierr;
1335   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1336   MatScalar      *aa,*aa_i;
1337   PetscScalar    *v_i;
1338 
1339   PetscFunctionBegin;
1340   bs  = A->rmap->bs;
1341   ai  = a->i;
1342   aj  = a->j;
1343   aa  = a->a;
1344   bs2 = a->bs2;
1345 
1346   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1347 
1348   bn  = row/bs;   /* Block number */
1349   bp  = row % bs; /* Block Position */
1350   M   = ai[bn+1] - ai[bn];
1351   *nz = bs*M;
1352 
1353   if (v) {
1354     *v = 0;
1355     if (*nz) {
1356       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1357       for (i=0; i<M; i++) { /* for each block in the block row */
1358         v_i  = *v + i*bs;
1359         aa_i = aa + bs2*(ai[bn] + i);
1360         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1361       }
1362     }
1363   }
1364 
1365   if (idx) {
1366     *idx = 0;
1367     if (*nz) {
1368       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1369       for (i=0; i<M; i++) { /* for each block in the block row */
1370         idx_i = *idx + i*bs;
1371         itmp  = bs*aj[ai[bn] + i];
1372         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1373       }
1374     }
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1381 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1382 {
1383   PetscErrorCode ierr;
1384 
1385   PetscFunctionBegin;
1386   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1387   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1394 {
1395   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1396   Mat            C;
1397   PetscErrorCode ierr;
1398   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1399   PetscInt       *rows,*cols,bs2=a->bs2;
1400   MatScalar      *array;
1401 
1402   PetscFunctionBegin;
1403   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1404   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1405     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1406     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1407 
1408     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1409     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1410     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1411     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1412     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1413     ierr = PetscFree(col);CHKERRQ(ierr);
1414   } else {
1415     C = *B;
1416   }
1417 
1418   array = a->a;
1419   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1420   cols = rows + bs;
1421   for (i=0; i<mbs; i++) {
1422     cols[0] = i*bs;
1423     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1424     len = ai[i+1] - ai[i];
1425     for (j=0; j<len; j++) {
1426       rows[0] = (*aj++)*bs;
1427       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1428       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1429       array += bs2;
1430     }
1431   }
1432   ierr = PetscFree(rows);CHKERRQ(ierr);
1433 
1434   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436 
1437   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1438     *B = C;
1439   } else {
1440     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1447 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1448 {
1449   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1450   PetscErrorCode ierr;
1451   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1452   int            fd;
1453   PetscScalar    *aa;
1454   FILE           *file;
1455 
1456   PetscFunctionBegin;
1457   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1458   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1459   col_lens[0] = MAT_FILE_COOKIE;
1460 
1461   col_lens[1] = A->rmap->N;
1462   col_lens[2] = A->cmap->n;
1463   col_lens[3] = a->nz*bs2;
1464 
1465   /* store lengths of each row and write (including header) to file */
1466   count = 0;
1467   for (i=0; i<a->mbs; i++) {
1468     for (j=0; j<bs; j++) {
1469       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1470     }
1471   }
1472   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1473   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1474 
1475   /* store column indices (zero start index) */
1476   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1477   count = 0;
1478   for (i=0; i<a->mbs; i++) {
1479     for (j=0; j<bs; j++) {
1480       for (k=a->i[i]; k<a->i[i+1]; k++) {
1481         for (l=0; l<bs; l++) {
1482           jj[count++] = bs*a->j[k] + l;
1483         }
1484       }
1485     }
1486   }
1487   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1488   ierr = PetscFree(jj);CHKERRQ(ierr);
1489 
1490   /* store nonzero values */
1491   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       for (k=a->i[i]; k<a->i[i+1]; k++) {
1496         for (l=0; l<bs; l++) {
1497           aa[count++] = a->a[bs2*k + l*bs + j];
1498         }
1499       }
1500     }
1501   }
1502   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1503   ierr = PetscFree(aa);CHKERRQ(ierr);
1504 
1505   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1506   if (file) {
1507     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1508   }
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1514 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1515 {
1516   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1517   PetscErrorCode    ierr;
1518   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1519   PetscViewerFormat format;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1523   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1524     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1526     Mat aij;
1527     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1528     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1529     ierr = MatDestroy(aij);CHKERRQ(ierr);
1530   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1531      PetscFunctionReturn(0);
1532   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1533     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1534     for (i=0; i<a->mbs; i++) {
1535       for (j=0; j<bs; j++) {
1536         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1537         for (k=a->i[i]; k<a->i[i+1]; k++) {
1538           for (l=0; l<bs; l++) {
1539 #if defined(PETSC_USE_COMPLEX)
1540             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1542                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1545                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1546             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1547               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             }
1549 #else
1550             if (a->a[bs2*k + l*bs + j] != 0.0) {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1552             }
1553 #endif
1554           }
1555         }
1556         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1557       }
1558     }
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1560   } else {
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1562     for (i=0; i<a->mbs; i++) {
1563       for (j=0; j<bs; j++) {
1564         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1565         for (k=a->i[i]; k<a->i[i+1]; k++) {
1566           for (l=0; l<bs; l++) {
1567 #if defined(PETSC_USE_COMPLEX)
1568             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1570                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1573                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             } else {
1575               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             }
1577 #else
1578             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   }
1587   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1593 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1594 {
1595   Mat            A = (Mat) Aa;
1596   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1597   PetscErrorCode ierr;
1598   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1599   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1600   MatScalar      *aa;
1601   PetscViewer    viewer;
1602 
1603   PetscFunctionBegin;
1604 
1605   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1606   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1607 
1608   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1609 
1610   /* loop over matrix elements drawing boxes */
1611   color = PETSC_DRAW_BLUE;
1612   for (i=0,row=0; i<mbs; i++,row+=bs) {
1613     for (j=a->i[i]; j<a->i[i+1]; j++) {
1614       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616       aa = a->a + j*bs2;
1617       for (k=0; k<bs; k++) {
1618         for (l=0; l<bs; l++) {
1619           if (PetscRealPart(*aa++) >=  0.) continue;
1620           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1621         }
1622       }
1623     }
1624   }
1625   color = PETSC_DRAW_CYAN;
1626   for (i=0,row=0; i<mbs; i++,row+=bs) {
1627     for (j=a->i[i]; j<a->i[i+1]; j++) {
1628       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1629       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1630       aa = a->a + j*bs2;
1631       for (k=0; k<bs; k++) {
1632         for (l=0; l<bs; l++) {
1633           if (PetscRealPart(*aa++) != 0.) continue;
1634           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1635         }
1636       }
1637     }
1638   }
1639 
1640   color = PETSC_DRAW_RED;
1641   for (i=0,row=0; i<mbs; i++,row+=bs) {
1642     for (j=a->i[i]; j<a->i[i+1]; j++) {
1643       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1644       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1645       aa = a->a + j*bs2;
1646       for (k=0; k<bs; k++) {
1647         for (l=0; l<bs; l++) {
1648           if (PetscRealPart(*aa++) <= 0.) continue;
1649           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1650         }
1651       }
1652     }
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1660 {
1661   PetscErrorCode ierr;
1662   PetscReal      xl,yl,xr,yr,w,h;
1663   PetscDraw      draw;
1664   PetscTruth     isnull;
1665 
1666   PetscFunctionBegin;
1667 
1668   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1669   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1670 
1671   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1672   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1673   xr += w;    yr += h;  xl = -w;     yl = -h;
1674   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1675   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1676   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatView_SeqBAIJ"
1682 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1683 {
1684   PetscErrorCode ierr;
1685   PetscTruth     iascii,isbinary,isdraw;
1686 
1687   PetscFunctionBegin;
1688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1691   if (iascii){
1692     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1693   } else if (isbinary) {
1694     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1695   } else if (isdraw) {
1696     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1697   } else {
1698     Mat B;
1699     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1700     ierr = MatView(B,viewer);CHKERRQ(ierr);
1701     ierr = MatDestroy(B);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1709 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1710 {
1711   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1712   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1713   PetscInt    *ai = a->i,*ailen = a->ilen;
1714   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1715   MatScalar   *ap,*aa = a->a;
1716 
1717   PetscFunctionBegin;
1718   for (k=0; k<m; k++) { /* loop over rows */
1719     row  = im[k]; brow = row/bs;
1720     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1721     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1722     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1723     nrow = ailen[brow];
1724     for (l=0; l<n; l++) { /* loop over columns */
1725       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1726       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1727       col  = in[l] ;
1728       bcol = col/bs;
1729       cidx = col%bs;
1730       ridx = row%bs;
1731       high = nrow;
1732       low  = 0; /* assume unsorted */
1733       while (high-low > 5) {
1734         t = (low+high)/2;
1735         if (rp[t] > bcol) high = t;
1736         else             low  = t;
1737       }
1738       for (i=low; i<high; i++) {
1739         if (rp[i] > bcol) break;
1740         if (rp[i] == bcol) {
1741           *v++ = ap[bs2*i+bs*cidx+ridx];
1742           goto finished;
1743         }
1744       }
1745       *v++ = 0.0;
1746       finished:;
1747     }
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #define CHUNKSIZE 10
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1755 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1756 {
1757   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1758   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1759   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1760   PetscErrorCode    ierr;
1761   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1762   PetscTruth        roworiented=a->roworiented;
1763   const PetscScalar *value = v;
1764   MatScalar         *ap,*aa = a->a,*bap;
1765 
1766   PetscFunctionBegin;
1767   if (roworiented) {
1768     stepval = (n-1)*bs;
1769   } else {
1770     stepval = (m-1)*bs;
1771   }
1772   for (k=0; k<m; k++) { /* loop over added rows */
1773     row  = im[k];
1774     if (row < 0) continue;
1775 #if defined(PETSC_USE_DEBUG)
1776     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1777 #endif
1778     rp   = aj + ai[row];
1779     ap   = aa + bs2*ai[row];
1780     rmax = imax[row];
1781     nrow = ailen[row];
1782     low  = 0;
1783     high = nrow;
1784     for (l=0; l<n; l++) { /* loop over added columns */
1785       if (in[l] < 0) continue;
1786 #if defined(PETSC_USE_DEBUG)
1787       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1788 #endif
1789       col = in[l];
1790       if (roworiented) {
1791         value = v + k*(stepval+bs)*bs + l*bs;
1792       } else {
1793         value = v + l*(stepval+bs)*bs + k*bs;
1794       }
1795       if (col <= lastcol) low = 0; else high = nrow;
1796       lastcol = col;
1797       while (high-low > 7) {
1798         t = (low+high)/2;
1799         if (rp[t] > col) high = t;
1800         else             low  = t;
1801       }
1802       for (i=low; i<high; i++) {
1803         if (rp[i] > col) break;
1804         if (rp[i] == col) {
1805           bap  = ap +  bs2*i;
1806           if (roworiented) {
1807             if (is == ADD_VALUES) {
1808               for (ii=0; ii<bs; ii++,value+=stepval) {
1809                 for (jj=ii; jj<bs2; jj+=bs) {
1810                   bap[jj] += *value++;
1811                 }
1812               }
1813             } else {
1814               for (ii=0; ii<bs; ii++,value+=stepval) {
1815                 for (jj=ii; jj<bs2; jj+=bs) {
1816                   bap[jj] = *value++;
1817                 }
1818               }
1819             }
1820           } else {
1821             if (is == ADD_VALUES) {
1822               for (ii=0; ii<bs; ii++,value+=stepval) {
1823                 for (jj=0; jj<bs; jj++) {
1824                   *bap++ += *value++;
1825                 }
1826               }
1827             } else {
1828               for (ii=0; ii<bs; ii++,value+=stepval) {
1829                 for (jj=0; jj<bs; jj++) {
1830                   *bap++  = *value++;
1831                 }
1832               }
1833             }
1834           }
1835           goto noinsert2;
1836         }
1837       }
1838       if (nonew == 1) goto noinsert2;
1839       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1840       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1841       N = nrow++ - 1; high++;
1842       /* shift up all the later entries in this row */
1843       for (ii=N; ii>=i; ii--) {
1844         rp[ii+1] = rp[ii];
1845         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1846       }
1847       if (N >= i) {
1848         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1849       }
1850       rp[i] = col;
1851       bap   = ap +  bs2*i;
1852       if (roworiented) {
1853         for (ii=0; ii<bs; ii++,value+=stepval) {
1854           for (jj=ii; jj<bs2; jj+=bs) {
1855             bap[jj] = *value++;
1856           }
1857         }
1858       } else {
1859         for (ii=0; ii<bs; ii++,value+=stepval) {
1860           for (jj=0; jj<bs; jj++) {
1861             *bap++  = *value++;
1862           }
1863         }
1864       }
1865       noinsert2:;
1866       low = i;
1867     }
1868     ailen[row] = nrow;
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1875 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1876 {
1877   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1878   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1879   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1880   PetscErrorCode ierr;
1881   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1882   MatScalar      *aa = a->a,*ap;
1883   PetscReal      ratio=0.6;
1884 
1885   PetscFunctionBegin;
1886   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1887 
1888   if (m) rmax = ailen[0];
1889   for (i=1; i<mbs; i++) {
1890     /* move each row back by the amount of empty slots (fshift) before it*/
1891     fshift += imax[i-1] - ailen[i-1];
1892     rmax   = PetscMax(rmax,ailen[i]);
1893     if (fshift) {
1894       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1895       N = ailen[i];
1896       for (j=0; j<N; j++) {
1897         ip[j-fshift] = ip[j];
1898         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1899       }
1900     }
1901     ai[i] = ai[i-1] + ailen[i-1];
1902   }
1903   if (mbs) {
1904     fshift += imax[mbs-1] - ailen[mbs-1];
1905     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1906   }
1907   /* reset ilen and imax for each row */
1908   for (i=0; i<mbs; i++) {
1909     ailen[i] = imax[i] = ai[i+1] - ai[i];
1910   }
1911   a->nz = ai[mbs];
1912 
1913   /* diagonals may have moved, so kill the diagonal pointers */
1914   a->idiagvalid = PETSC_FALSE;
1915   if (fshift && a->diag) {
1916     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1917     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1918     a->diag = 0;
1919   }
1920   if (fshift && a->nounused == -1) {
1921     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1922   }
1923   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1924   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1925   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1926   a->reallocs          = 0;
1927   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1928 
1929   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1930   if (a->compressedrow.use){
1931     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1932   }
1933 
1934   A->same_nonzero = PETSC_TRUE;
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 /*
1939    This function returns an array of flags which indicate the locations of contiguous
1940    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1941    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1942    Assume: sizes should be long enough to hold all the values.
1943 */
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1947 {
1948   PetscInt   i,j,k,row;
1949   PetscTruth flg;
1950 
1951   PetscFunctionBegin;
1952   for (i=0,j=0; i<n; j++) {
1953     row = idx[i];
1954     if (row%bs!=0) { /* Not the begining of a block */
1955       sizes[j] = 1;
1956       i++;
1957     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1958       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1959       i++;
1960     } else { /* Begining of the block, so check if the complete block exists */
1961       flg = PETSC_TRUE;
1962       for (k=1; k<bs; k++) {
1963         if (row+k != idx[i+k]) { /* break in the block */
1964           flg = PETSC_FALSE;
1965           break;
1966         }
1967       }
1968       if (flg) { /* No break in the bs */
1969         sizes[j] = bs;
1970         i+= bs;
1971       } else {
1972         sizes[j] = 1;
1973         i++;
1974       }
1975     }
1976   }
1977   *bs_max = j;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1983 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1984 {
1985   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1986   PetscErrorCode ierr;
1987   PetscInt       i,j,k,count,*rows;
1988   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1989   PetscScalar    zero = 0.0;
1990   MatScalar      *aa;
1991 
1992   PetscFunctionBegin;
1993   /* Make a copy of the IS and  sort it */
1994   /* allocate memory for rows,sizes */
1995   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1996   sizes = rows + is_n;
1997 
1998   /* copy IS values to rows, and sort them */
1999   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2000   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2001   if (baij->keepzeroedrows) {
2002     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2003     bs_max = is_n;
2004     A->same_nonzero = PETSC_TRUE;
2005   } else {
2006     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2007     A->same_nonzero = PETSC_FALSE;
2008   }
2009 
2010   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2011     row   = rows[j];
2012     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2013     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2014     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2015     if (sizes[i] == bs && !baij->keepzeroedrows) {
2016       if (diag != 0.0) {
2017         if (baij->ilen[row/bs] > 0) {
2018           baij->ilen[row/bs]       = 1;
2019           baij->j[baij->i[row/bs]] = row/bs;
2020           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2021         }
2022         /* Now insert all the diagonal values for this bs */
2023         for (k=0; k<bs; k++) {
2024           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2025         }
2026       } else { /* (diag == 0.0) */
2027         baij->ilen[row/bs] = 0;
2028       } /* end (diag == 0.0) */
2029     } else { /* (sizes[i] != bs) */
2030 #if defined (PETSC_USE_DEBUG)
2031       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2032 #endif
2033       for (k=0; k<count; k++) {
2034         aa[0] =  zero;
2035         aa    += bs;
2036       }
2037       if (diag != 0.0) {
2038         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2039       }
2040     }
2041   }
2042 
2043   ierr = PetscFree(rows);CHKERRQ(ierr);
2044   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2050 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2051 {
2052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2053   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2054   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2055   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2056   PetscErrorCode ierr;
2057   PetscInt       ridx,cidx,bs2=a->bs2;
2058   PetscTruth     roworiented=a->roworiented;
2059   MatScalar      *ap,value,*aa=a->a,*bap;
2060 
2061   PetscFunctionBegin;
2062   for (k=0; k<m; k++) { /* loop over added rows */
2063     row  = im[k];
2064     brow = row/bs;
2065     if (row < 0) continue;
2066 #if defined(PETSC_USE_DEBUG)
2067     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2068 #endif
2069     rp   = aj + ai[brow];
2070     ap   = aa + bs2*ai[brow];
2071     rmax = imax[brow];
2072     nrow = ailen[brow];
2073     low  = 0;
2074     high = nrow;
2075     for (l=0; l<n; l++) { /* loop over added columns */
2076       if (in[l] < 0) continue;
2077 #if defined(PETSC_USE_DEBUG)
2078       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2079 #endif
2080       col = in[l]; bcol = col/bs;
2081       ridx = row % bs; cidx = col % bs;
2082       if (roworiented) {
2083         value = v[l + k*n];
2084       } else {
2085         value = v[k + l*m];
2086       }
2087       if (col <= lastcol) low = 0; else high = nrow;
2088       lastcol = col;
2089       while (high-low > 7) {
2090         t = (low+high)/2;
2091         if (rp[t] > bcol) high = t;
2092         else              low  = t;
2093       }
2094       for (i=low; i<high; i++) {
2095         if (rp[i] > bcol) break;
2096         if (rp[i] == bcol) {
2097           bap  = ap +  bs2*i + bs*cidx + ridx;
2098           if (is == ADD_VALUES) *bap += value;
2099           else                  *bap  = value;
2100           goto noinsert1;
2101         }
2102       }
2103       if (nonew == 1) goto noinsert1;
2104       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2105       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2106       N = nrow++ - 1; high++;
2107       /* shift up all the later entries in this row */
2108       for (ii=N; ii>=i; ii--) {
2109         rp[ii+1] = rp[ii];
2110         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2111       }
2112       if (N>=i) {
2113         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2114       }
2115       rp[i]                      = bcol;
2116       ap[bs2*i + bs*cidx + ridx] = value;
2117       a->nz++;
2118       noinsert1:;
2119       low = i;
2120     }
2121     ailen[brow] = nrow;
2122   }
2123   A->same_nonzero = PETSC_FALSE;
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2131 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2132 {
2133   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2134   Mat            outA;
2135   PetscErrorCode ierr;
2136   PetscTruth     row_identity,col_identity;
2137 
2138   PetscFunctionBegin;
2139   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2140   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2141   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2142   if (!row_identity || !col_identity) {
2143     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2144   }
2145 
2146   outA          = inA;
2147   inA->factor   = MAT_FACTOR_LU;
2148 
2149   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2150 
2151   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2152   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2153   a->row = row;
2154   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2155   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2156   a->col = col;
2157 
2158   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2159   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2160   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2161 
2162   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2163   if (!a->solve_work) {
2164     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2165     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2166   }
2167   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2168 
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 EXTERN_C_BEGIN
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2175 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2176 {
2177   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2178   PetscInt    i,nz,mbs;
2179 
2180   PetscFunctionBegin;
2181   nz  = baij->maxnz/baij->bs2;
2182   mbs = baij->mbs;
2183   for (i=0; i<nz; i++) {
2184     baij->j[i] = indices[i];
2185   }
2186   baij->nz = nz;
2187   for (i=0; i<mbs; i++) {
2188     baij->ilen[i] = baij->imax[i];
2189   }
2190   PetscFunctionReturn(0);
2191 }
2192 EXTERN_C_END
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2196 /*@
2197     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2198        in the matrix.
2199 
2200   Input Parameters:
2201 +  mat - the SeqBAIJ matrix
2202 -  indices - the column indices
2203 
2204   Level: advanced
2205 
2206   Notes:
2207     This can be called if you have precomputed the nonzero structure of the
2208   matrix and want to provide it to the matrix object to improve the performance
2209   of the MatSetValues() operation.
2210 
2211     You MUST have set the correct numbers of nonzeros per row in the call to
2212   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2213 
2214     MUST be called before any calls to MatSetValues();
2215 
2216 @*/
2217 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2218 {
2219   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2220 
2221   PetscFunctionBegin;
2222   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2223   PetscValidPointer(indices,2);
2224   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2225   if (f) {
2226     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2227   } else {
2228     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2235 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2236 {
2237   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2238   PetscErrorCode ierr;
2239   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2240   PetscReal      atmp;
2241   PetscScalar    *x,zero = 0.0;
2242   MatScalar      *aa;
2243   PetscInt       ncols,brow,krow,kcol;
2244 
2245   PetscFunctionBegin;
2246   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2247   bs   = A->rmap->bs;
2248   aa   = a->a;
2249   ai   = a->i;
2250   aj   = a->j;
2251   mbs  = a->mbs;
2252 
2253   ierr = VecSet(v,zero);CHKERRQ(ierr);
2254   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2255   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2256   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2257   for (i=0; i<mbs; i++) {
2258     ncols = ai[1] - ai[0]; ai++;
2259     brow  = bs*i;
2260     for (j=0; j<ncols; j++){
2261       for (kcol=0; kcol<bs; kcol++){
2262         for (krow=0; krow<bs; krow++){
2263           atmp = PetscAbsScalar(*aa);aa++;
2264           row = brow + krow;    /* row index */
2265           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2266           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2267         }
2268       }
2269       aj++;
2270     }
2271   }
2272   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatCopy_SeqBAIJ"
2278 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2279 {
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   /* If the two matrices have the same copy implementation, use fast copy. */
2284   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2285     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2286     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2287 
2288     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2289       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2290     }
2291     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2292   } else {
2293     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2294   }
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2300 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2301 {
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2311 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2312 {
2313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2314   PetscFunctionBegin;
2315   *array = a->a;
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2321 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2322 {
2323   PetscFunctionBegin;
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #include "petscblaslapack.h"
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2330 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2331 {
2332   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2333   PetscErrorCode ierr;
2334   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2335   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2336 
2337   PetscFunctionBegin;
2338   if (str == SAME_NONZERO_PATTERN) {
2339     PetscScalar alpha = a;
2340     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2341   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2342     if (y->xtoy && y->XtoY != X) {
2343       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2344       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2345     }
2346     if (!y->xtoy) { /* get xtoy */
2347       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2348       y->XtoY = X;
2349       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2350     }
2351     bs2 = bs*bs;
2352     for (i=0; i<x->nz; i++) {
2353       j = 0;
2354       while (j < bs2){
2355         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2356         j++;
2357       }
2358     }
2359     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2360   } else {
2361     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2362   }
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2368 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2369 {
2370   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2371   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2372   MatScalar      *aa = a->a;
2373 
2374   PetscFunctionBegin;
2375   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2381 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2382 {
2383   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2384   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2385   MatScalar      *aa = a->a;
2386 
2387   PetscFunctionBegin;
2388   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 
2393 /* -------------------------------------------------------------------*/
2394 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2395        MatGetRow_SeqBAIJ,
2396        MatRestoreRow_SeqBAIJ,
2397        MatMult_SeqBAIJ_N,
2398 /* 4*/ MatMultAdd_SeqBAIJ_N,
2399        MatMultTranspose_SeqBAIJ,
2400        MatMultTransposeAdd_SeqBAIJ,
2401        0,
2402        0,
2403        0,
2404 /*10*/ 0,
2405        MatLUFactor_SeqBAIJ,
2406        0,
2407        0,
2408        MatTranspose_SeqBAIJ,
2409 /*15*/ MatGetInfo_SeqBAIJ,
2410        MatEqual_SeqBAIJ,
2411        MatGetDiagonal_SeqBAIJ,
2412        MatDiagonalScale_SeqBAIJ,
2413        MatNorm_SeqBAIJ,
2414 /*20*/ 0,
2415        MatAssemblyEnd_SeqBAIJ,
2416        MatSetOption_SeqBAIJ,
2417        MatZeroEntries_SeqBAIJ,
2418 /*24*/ MatZeroRows_SeqBAIJ,
2419        0,
2420        0,
2421        0,
2422        0,
2423 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2424        0,
2425        0,
2426        MatGetArray_SeqBAIJ,
2427        MatRestoreArray_SeqBAIJ,
2428 /*34*/ MatDuplicate_SeqBAIJ,
2429        0,
2430        0,
2431        MatILUFactor_SeqBAIJ,
2432        0,
2433 /*39*/ MatAXPY_SeqBAIJ,
2434        MatGetSubMatrices_SeqBAIJ,
2435        MatIncreaseOverlap_SeqBAIJ,
2436        MatGetValues_SeqBAIJ,
2437        MatCopy_SeqBAIJ,
2438 /*44*/ 0,
2439        MatScale_SeqBAIJ,
2440        0,
2441        0,
2442        MatILUDTFactor_SeqBAIJ,
2443 /*49*/ 0,
2444        MatGetRowIJ_SeqBAIJ,
2445        MatRestoreRowIJ_SeqBAIJ,
2446        0,
2447        0,
2448 /*54*/ 0,
2449        0,
2450        0,
2451        0,
2452        MatSetValuesBlocked_SeqBAIJ,
2453 /*59*/ MatGetSubMatrix_SeqBAIJ,
2454        MatDestroy_SeqBAIJ,
2455        MatView_SeqBAIJ,
2456        0,
2457        0,
2458 /*64*/ 0,
2459        0,
2460        0,
2461        0,
2462        0,
2463 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2464        0,
2465        MatConvert_Basic,
2466        0,
2467        0,
2468 /*74*/ 0,
2469        0,
2470        0,
2471        0,
2472        0,
2473 /*79*/ 0,
2474        0,
2475        0,
2476        0,
2477        MatLoad_SeqBAIJ,
2478 /*84*/ 0,
2479        0,
2480        0,
2481        0,
2482        0,
2483 /*89*/ 0,
2484        0,
2485        0,
2486        0,
2487        0,
2488 /*94*/ 0,
2489        0,
2490        0,
2491        0,
2492        0,
2493 /*99*/0,
2494        0,
2495        0,
2496        0,
2497        0,
2498 /*104*/0,
2499        MatRealPart_SeqBAIJ,
2500        MatImaginaryPart_SeqBAIJ,
2501        0,
2502        0,
2503 /*109*/0,
2504        0,
2505        0,
2506        0,
2507        MatMissingDiagonal_SeqBAIJ
2508 /*114*/
2509 };
2510 
2511 EXTERN_C_BEGIN
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2514 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2515 {
2516   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2517   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2518   PetscErrorCode ierr;
2519 
2520   PetscFunctionBegin;
2521   if (aij->nonew != 1) {
2522     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2523   }
2524 
2525   /* allocate space for values if not already there */
2526   if (!aij->saved_values) {
2527     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2528   }
2529 
2530   /* copy values over */
2531   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 EXTERN_C_END
2535 
2536 EXTERN_C_BEGIN
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2539 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2540 {
2541   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2542   PetscErrorCode ierr;
2543   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2544 
2545   PetscFunctionBegin;
2546   if (aij->nonew != 1) {
2547     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2548   }
2549   if (!aij->saved_values) {
2550     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2551   }
2552 
2553   /* copy values over */
2554   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 EXTERN_C_END
2558 
2559 EXTERN_C_BEGIN
2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2561 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2562 EXTERN_C_END
2563 
2564 EXTERN_C_BEGIN
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2567 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2568 {
2569   Mat_SeqBAIJ    *b;
2570   PetscErrorCode ierr;
2571   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2572   PetscTruth     flg,skipallocation = PETSC_FALSE;
2573 
2574   PetscFunctionBegin;
2575 
2576   if (nz == MAT_SKIP_ALLOCATION) {
2577     skipallocation = PETSC_TRUE;
2578     nz             = 0;
2579   }
2580 
2581   if (bs < 0) {
2582     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2583       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2584     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2585     bs   = PetscAbs(bs);
2586   }
2587   if (nnz && newbs != bs) {
2588     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2589   }
2590   bs   = newbs;
2591 
2592   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2593   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2594   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2595   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2596 
2597   B->preallocated = PETSC_TRUE;
2598 
2599   mbs  = B->rmap->n/bs;
2600   nbs  = B->cmap->n/bs;
2601   bs2  = bs*bs;
2602 
2603   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2604     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2605   }
2606 
2607   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2608   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2609   if (nnz) {
2610     for (i=0; i<mbs; i++) {
2611       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2612       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2613     }
2614   }
2615 
2616   b       = (Mat_SeqBAIJ*)B->data;
2617   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2618     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2619   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2620 
2621   if (!flg) {
2622     switch (bs) {
2623     case 1:
2624       B->ops->mult            = MatMult_SeqBAIJ_1;
2625       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2626       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2627       break;
2628     case 2:
2629       B->ops->mult            = MatMult_SeqBAIJ_2;
2630       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2631       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2632       break;
2633     case 3:
2634       B->ops->mult            = MatMult_SeqBAIJ_3;
2635       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2636       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2637       break;
2638     case 4:
2639       B->ops->mult            = MatMult_SeqBAIJ_4;
2640       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2641       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2642       break;
2643     case 5:
2644       B->ops->mult            = MatMult_SeqBAIJ_5;
2645       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2646       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2647       break;
2648     case 6:
2649       B->ops->mult            = MatMult_SeqBAIJ_6;
2650       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2651       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2652       break;
2653     case 7:
2654       B->ops->mult            = MatMult_SeqBAIJ_7;
2655       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2656       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2657       break;
2658     default:
2659       B->ops->mult            = MatMult_SeqBAIJ_N;
2660       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2661       break;
2662     }
2663   }
2664   B->rmap->bs      = bs;
2665   b->mbs     = mbs;
2666   b->nbs     = nbs;
2667   if (!skipallocation) {
2668     if (!b->imax) {
2669       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2670     }
2671     /* b->ilen will count nonzeros in each block row so far. */
2672     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2673     if (!nnz) {
2674       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2675       else if (nz <= 0)        nz = 1;
2676       for (i=0; i<mbs; i++) b->imax[i] = nz;
2677       nz = nz*mbs;
2678     } else {
2679       nz = 0;
2680       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2681     }
2682 
2683     /* allocate the matrix space */
2684     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2685     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2686     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2687     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2688     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2689     b->singlemalloc = PETSC_TRUE;
2690     b->i[0] = 0;
2691     for (i=1; i<mbs+1; i++) {
2692       b->i[i] = b->i[i-1] + b->imax[i-1];
2693     }
2694     b->free_a     = PETSC_TRUE;
2695     b->free_ij    = PETSC_TRUE;
2696   } else {
2697     b->free_a     = PETSC_FALSE;
2698     b->free_ij    = PETSC_FALSE;
2699   }
2700 
2701   B->rmap->bs          = bs;
2702   b->bs2              = bs2;
2703   b->mbs              = mbs;
2704   b->nz               = 0;
2705   b->maxnz            = nz*bs2;
2706   B->info.nz_unneeded = (PetscReal)b->maxnz;
2707   PetscFunctionReturn(0);
2708 }
2709 EXTERN_C_END
2710 
2711 EXTERN_C_BEGIN
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2714 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2715 {
2716   PetscInt       i,m,nz,nz_max=0,*nnz;
2717   PetscScalar    *values=0;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721 
2722   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2723 
2724   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2725   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2726   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2727   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2728   m = B->rmap->n/bs;
2729 
2730   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2731   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2732   for(i=0; i<m; i++) {
2733     nz = ii[i+1]- ii[i];
2734     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2735     nz_max = PetscMax(nz_max, nz);
2736     nnz[i] = nz;
2737   }
2738   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2739   ierr = PetscFree(nnz);CHKERRQ(ierr);
2740 
2741   values = (PetscScalar*)V;
2742   if (!values) {
2743     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2744     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2745   }
2746   for (i=0; i<m; i++) {
2747     PetscInt          ncols  = ii[i+1] - ii[i];
2748     const PetscInt    *icols = jj + ii[i];
2749     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2750     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2751   }
2752   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2753   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 EXTERN_C_END
2759 
2760 
2761 EXTERN_C_BEGIN
2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2763 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2764 EXTERN_C_END
2765 
2766 /*MC
2767    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2768    block sparse compressed row format.
2769 
2770    Options Database Keys:
2771 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2772 
2773   Level: beginner
2774 
2775 .seealso: MatCreateSeqBAIJ()
2776 M*/
2777 
2778 
2779 EXTERN_C_BEGIN
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatCreate_SeqBAIJ"
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2783 {
2784   PetscErrorCode ierr;
2785   PetscMPIInt    size;
2786   Mat_SeqBAIJ    *b;
2787 
2788   PetscFunctionBegin;
2789   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2790   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2791 
2792   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2793   B->data = (void*)b;
2794   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2795   B->mapping          = 0;
2796   b->row              = 0;
2797   b->col              = 0;
2798   b->icol             = 0;
2799   b->reallocs         = 0;
2800   b->saved_values     = 0;
2801 
2802   b->roworiented      = PETSC_TRUE;
2803   b->nonew            = 0;
2804   b->diag             = 0;
2805   b->solve_work       = 0;
2806   b->mult_work        = 0;
2807   B->spptr            = 0;
2808   B->info.nz_unneeded = (PetscReal)b->maxnz;
2809   b->keepzeroedrows   = PETSC_FALSE;
2810   b->xtoy              = 0;
2811   b->XtoY              = 0;
2812   b->compressedrow.use     = PETSC_FALSE;
2813   b->compressedrow.nrows   = 0;
2814   b->compressedrow.i       = PETSC_NULL;
2815   b->compressedrow.rindex  = PETSC_NULL;
2816   b->compressedrow.checked = PETSC_FALSE;
2817   B->same_nonzero          = PETSC_FALSE;
2818 
2819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2820                                      "MatGetFactorAvailable_seqbaij_petsc",
2821                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2823                                      "MatGetFactor_seqbaij_petsc",
2824                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2826                                      "MatInvertBlockDiagonal_SeqBAIJ",
2827                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2829                                      "MatStoreValues_SeqBAIJ",
2830                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2832                                      "MatRetrieveValues_SeqBAIJ",
2833                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2835                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2836                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2838                                      "MatConvert_SeqBAIJ_SeqAIJ",
2839                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2841                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2842                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2844                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2845                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2847                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2848                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2849   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 EXTERN_C_END
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2856 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2857 {
2858   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2859   PetscErrorCode ierr;
2860   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2861 
2862   PetscFunctionBegin;
2863   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2864 
2865   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2866   for (i=0; i<mbs; i++) {
2867     c->imax[i] = a->imax[i];
2868     c->ilen[i] = a->ilen[i];
2869   }
2870 
2871   /* allocate the matrix space */
2872   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2873   c->singlemalloc = PETSC_TRUE;
2874   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2875   if (mbs > 0) {
2876     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2877     if (cpvalues == MAT_COPY_VALUES) {
2878       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2879     } else {
2880       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2881     }
2882   }
2883   c->roworiented = a->roworiented;
2884   c->nonew       = a->nonew;
2885   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2886   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2887   c->bs2         = a->bs2;
2888   c->mbs         = a->mbs;
2889   c->nbs         = a->nbs;
2890 
2891   if (a->diag) {
2892     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2893     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2894     for (i=0; i<mbs; i++) {
2895       c->diag[i] = a->diag[i];
2896     }
2897   } else c->diag        = 0;
2898   c->nz                 = a->nz;
2899   c->maxnz              = a->maxnz;
2900   c->solve_work         = 0;
2901   c->mult_work          = 0;
2902   c->free_a             = PETSC_TRUE;
2903   c->free_ij            = PETSC_TRUE;
2904   C->preallocated       = PETSC_TRUE;
2905   C->assembled          = PETSC_TRUE;
2906 
2907   c->compressedrow.use     = a->compressedrow.use;
2908   c->compressedrow.nrows   = a->compressedrow.nrows;
2909   c->compressedrow.checked = a->compressedrow.checked;
2910   if ( a->compressedrow.checked && a->compressedrow.use){
2911     i = a->compressedrow.nrows;
2912     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2913     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2914     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2915     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2916   } else {
2917     c->compressedrow.use    = PETSC_FALSE;
2918     c->compressedrow.i      = PETSC_NULL;
2919     c->compressedrow.rindex = PETSC_NULL;
2920   }
2921   C->same_nonzero = A->same_nonzero;
2922   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2928 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2929 {
2930     PetscErrorCode ierr;
2931 
2932   PetscFunctionBegin;
2933   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2934   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2935   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2936   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatLoad_SeqBAIJ"
2942 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2943 {
2944   Mat_SeqBAIJ    *a;
2945   Mat            B;
2946   PetscErrorCode ierr;
2947   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2948   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2949   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2950   PetscInt       *masked,nmask,tmp,bs2,ishift;
2951   PetscMPIInt    size;
2952   int            fd;
2953   PetscScalar    *aa;
2954   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2955 
2956   PetscFunctionBegin;
2957   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2958     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2959   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2960   bs2  = bs*bs;
2961 
2962   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2963   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2964   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2965   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2966   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2967   M = header[1]; N = header[2]; nz = header[3];
2968 
2969   if (header[3] < 0) {
2970     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2971   }
2972 
2973   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2974 
2975   /*
2976      This code adds extra rows to make sure the number of rows is
2977     divisible by the blocksize
2978   */
2979   mbs        = M/bs;
2980   extra_rows = bs - M + bs*(mbs);
2981   if (extra_rows == bs) extra_rows = 0;
2982   else                  mbs++;
2983   if (extra_rows) {
2984     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2985   }
2986 
2987   /* read in row lengths */
2988   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2989   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2990   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2991 
2992   /* read in column indices */
2993   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2994   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2995   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2996 
2997   /* loop over row lengths determining block row lengths */
2998   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2999   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3000   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3001   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3002   masked   = mask + mbs;
3003   rowcount = 0; nzcount = 0;
3004   for (i=0; i<mbs; i++) {
3005     nmask = 0;
3006     for (j=0; j<bs; j++) {
3007       kmax = rowlengths[rowcount];
3008       for (k=0; k<kmax; k++) {
3009         tmp = jj[nzcount++]/bs;
3010         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3011       }
3012       rowcount++;
3013     }
3014     browlengths[i] += nmask;
3015     /* zero out the mask elements we set */
3016     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3017   }
3018 
3019   /* create our matrix */
3020   ierr = MatCreate(comm,&B);
3021   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3022   ierr = MatSetType(B,type);CHKERRQ(ierr);
3023   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3024   a = (Mat_SeqBAIJ*)B->data;
3025 
3026   /* set matrix "i" values */
3027   a->i[0] = 0;
3028   for (i=1; i<= mbs; i++) {
3029     a->i[i]      = a->i[i-1] + browlengths[i-1];
3030     a->ilen[i-1] = browlengths[i-1];
3031   }
3032   a->nz         = 0;
3033   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3034 
3035   /* read in nonzero values */
3036   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3037   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3038   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3039 
3040   /* set "a" and "j" values into matrix */
3041   nzcount = 0; jcount = 0;
3042   for (i=0; i<mbs; i++) {
3043     nzcountb = nzcount;
3044     nmask    = 0;
3045     for (j=0; j<bs; j++) {
3046       kmax = rowlengths[i*bs+j];
3047       for (k=0; k<kmax; k++) {
3048         tmp = jj[nzcount++]/bs;
3049 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3050       }
3051     }
3052     /* sort the masked values */
3053     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3054 
3055     /* set "j" values into matrix */
3056     maskcount = 1;
3057     for (j=0; j<nmask; j++) {
3058       a->j[jcount++]  = masked[j];
3059       mask[masked[j]] = maskcount++;
3060     }
3061     /* set "a" values into matrix */
3062     ishift = bs2*a->i[i];
3063     for (j=0; j<bs; j++) {
3064       kmax = rowlengths[i*bs+j];
3065       for (k=0; k<kmax; k++) {
3066         tmp       = jj[nzcountb]/bs ;
3067         block     = mask[tmp] - 1;
3068         point     = jj[nzcountb] - bs*tmp;
3069         idx       = ishift + bs2*block + j + bs*point;
3070         a->a[idx] = (MatScalar)aa[nzcountb++];
3071       }
3072     }
3073     /* zero out the mask elements we set */
3074     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3075   }
3076   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3077 
3078   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3079   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3080   ierr = PetscFree(aa);CHKERRQ(ierr);
3081   ierr = PetscFree(jj);CHKERRQ(ierr);
3082   ierr = PetscFree(mask);CHKERRQ(ierr);
3083 
3084   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3085   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3086   ierr = MatView_Private(B);CHKERRQ(ierr);
3087 
3088   *A = B;
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatCreateSeqBAIJ"
3094 /*@C
3095    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3096    compressed row) format.  For good matrix assembly performance the
3097    user should preallocate the matrix storage by setting the parameter nz
3098    (or the array nnz).  By setting these parameters accurately, performance
3099    during matrix assembly can be increased by more than a factor of 50.
3100 
3101    Collective on MPI_Comm
3102 
3103    Input Parameters:
3104 +  comm - MPI communicator, set to PETSC_COMM_SELF
3105 .  bs - size of block
3106 .  m - number of rows
3107 .  n - number of columns
3108 .  nz - number of nonzero blocks  per block row (same for all rows)
3109 -  nnz - array containing the number of nonzero blocks in the various block rows
3110          (possibly different for each block row) or PETSC_NULL
3111 
3112    Output Parameter:
3113 .  A - the matrix
3114 
3115    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3116    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3117    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3118 
3119    Options Database Keys:
3120 .   -mat_no_unroll - uses code that does not unroll the loops in the
3121                      block calculations (much slower)
3122 .    -mat_block_size - size of the blocks to use
3123 
3124    Level: intermediate
3125 
3126    Notes:
3127    The number of rows and columns must be divisible by blocksize.
3128 
3129    If the nnz parameter is given then the nz parameter is ignored
3130 
3131    A nonzero block is any block that as 1 or more nonzeros in it
3132 
3133    The block AIJ format is fully compatible with standard Fortran 77
3134    storage.  That is, the stored row and column indices can begin at
3135    either one (as in Fortran) or zero.  See the users' manual for details.
3136 
3137    Specify the preallocated storage with either nz or nnz (not both).
3138    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3139    allocation.  For additional details, see the users manual chapter on
3140    matrices.
3141 
3142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3143 @*/
3144 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3145 {
3146   PetscErrorCode ierr;
3147 
3148   PetscFunctionBegin;
3149   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3150   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3151   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3152   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3158 /*@C
3159    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3160    per row in the matrix. For good matrix assembly performance the
3161    user should preallocate the matrix storage by setting the parameter nz
3162    (or the array nnz).  By setting these parameters accurately, performance
3163    during matrix assembly can be increased by more than a factor of 50.
3164 
3165    Collective on MPI_Comm
3166 
3167    Input Parameters:
3168 +  A - the matrix
3169 .  bs - size of block
3170 .  nz - number of block nonzeros per block row (same for all rows)
3171 -  nnz - array containing the number of block nonzeros in the various block rows
3172          (possibly different for each block row) or PETSC_NULL
3173 
3174    Options Database Keys:
3175 .   -mat_no_unroll - uses code that does not unroll the loops in the
3176                      block calculations (much slower)
3177 .    -mat_block_size - size of the blocks to use
3178 
3179    Level: intermediate
3180 
3181    Notes:
3182    If the nnz parameter is given then the nz parameter is ignored
3183 
3184    You can call MatGetInfo() to get information on how effective the preallocation was;
3185    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3186    You can also run with the option -info and look for messages with the string
3187    malloc in them to see if additional memory allocation was needed.
3188 
3189    The block AIJ format is fully compatible with standard Fortran 77
3190    storage.  That is, the stored row and column indices can begin at
3191    either one (as in Fortran) or zero.  See the users' manual for details.
3192 
3193    Specify the preallocated storage with either nz or nnz (not both).
3194    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3195    allocation.  For additional details, see the users manual chapter on
3196    matrices.
3197 
3198 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3199 @*/
3200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3201 {
3202   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3203 
3204   PetscFunctionBegin;
3205   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3206   if (f) {
3207     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3208   }
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3214 /*@C
3215    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3216    (the default sequential PETSc format).
3217 
3218    Collective on MPI_Comm
3219 
3220    Input Parameters:
3221 +  A - the matrix
3222 .  i - the indices into j for the start of each local row (starts with zero)
3223 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3224 -  v - optional values in the matrix
3225 
3226    Level: developer
3227 
3228 .keywords: matrix, aij, compressed row, sparse
3229 
3230 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3231 @*/
3232 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3233 {
3234   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3235 
3236   PetscFunctionBegin;
3237   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3238   if (f) {
3239     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3240   }
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3247 /*@
3248      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3249               (upper triangular entries in CSR format) provided by the user.
3250 
3251      Collective on MPI_Comm
3252 
3253    Input Parameters:
3254 +  comm - must be an MPI communicator of size 1
3255 .  bs - size of block
3256 .  m - number of rows
3257 .  n - number of columns
3258 .  i - row indices
3259 .  j - column indices
3260 -  a - matrix values
3261 
3262    Output Parameter:
3263 .  mat - the matrix
3264 
3265    Level: intermediate
3266 
3267    Notes:
3268        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3269     once the matrix is destroyed
3270 
3271        You cannot set new nonzero locations into this matrix, that will generate an error.
3272 
3273        The i and j indices are 0 based
3274 
3275 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3276 
3277 @*/
3278 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3279 {
3280   PetscErrorCode ierr;
3281   PetscInt       ii;
3282   Mat_SeqBAIJ    *baij;
3283 
3284   PetscFunctionBegin;
3285   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3286   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3287 
3288   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3289   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3290   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3291   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3292   baij = (Mat_SeqBAIJ*)(*mat)->data;
3293   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3294 
3295   baij->i = i;
3296   baij->j = j;
3297   baij->a = a;
3298   baij->singlemalloc = PETSC_FALSE;
3299   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3300   baij->free_a       = PETSC_FALSE;
3301   baij->free_ij       = PETSC_FALSE;
3302 
3303   for (ii=0; ii<m; ii++) {
3304     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3305 #if defined(PETSC_USE_DEBUG)
3306     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3307 #endif
3308   }
3309 #if defined(PETSC_USE_DEBUG)
3310   for (ii=0; ii<baij->i[m]; ii++) {
3311     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3312     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3313   }
3314 #endif
3315 
3316   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3318   PetscFunctionReturn(0);
3319 }
3320