xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 56ef4db76caab0fcefdb9bab2022010080f16de2)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 EXTERN_C_BEGIN
36 #undef __FUNCT__
37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
38 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
39 {
40   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
41   PetscErrorCode ierr;
42   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
43   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
44   PetscReal      shift = 0.0;
45 
46   PetscFunctionBegin;
47   if (a->idiagvalid) PetscFunctionReturn(0);
48   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
49   diag_offset = a->diag;
50   if (!a->idiag) {
51     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
52     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
53   }
54   diag  = a->idiag;
55   mdiag = a->idiag+bs2*mbs;
56   /* factor and invert each block */
57   switch (bs){
58     case 1:
59       for (i=0; i<mbs; i++) {
60         odiag = v + 1*diag_offset[i];
61         diag[0]  = odiag[0];
62         mdiag[0] = odiag[0];
63         diag[0]  = 1.0 / (diag[0] + shift);
64         diag    += 1;
65         mdiag   += 1;
66       }
67       break;
68     case 2:
69       for (i=0; i<mbs; i++) {
70         odiag   = v + 4*diag_offset[i];
71         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
72 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
73 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
74 	diag    += 4;
75 	mdiag   += 4;
76       }
77       break;
78     case 3:
79       for (i=0; i<mbs; i++) {
80         odiag    = v + 9*diag_offset[i];
81         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
82         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
83         diag[8]  = odiag[8];
84         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
85         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
86         mdiag[8] = odiag[8];
87 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
88 	diag    += 9;
89 	mdiag   += 9;
90       }
91       break;
92     case 4:
93       for (i=0; i<mbs; i++) {
94         odiag  = v + 16*diag_offset[i];
95         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
97 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
98 	diag  += 16;
99 	mdiag += 16;
100       }
101       break;
102     case 5:
103       for (i=0; i<mbs; i++) {
104         odiag = v + 25*diag_offset[i];
105         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
107 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
108 	diag  += 25;
109 	mdiag += 25;
110       }
111       break;
112     case 6:
113       for (i=0; i<mbs; i++) {
114         odiag = v + 36*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
117 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
118 	diag  += 36;
119 	mdiag += 36;
120       }
121       break;
122     case 7:
123       for (i=0; i<mbs; i++) {
124         odiag = v + 49*diag_offset[i];
125         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
127 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
128 	diag  += 49;
129 	mdiag += 49;
130       }
131       break;
132     default:
133       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
134       for (i=0; i<mbs; i++) {
135         odiag = v + bs2*diag_offset[i];
136         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
139 	diag  += bs2;
140 	mdiag += bs2;
141       }
142       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
143   }
144   a->idiagvalid = PETSC_TRUE;
145   PetscFunctionReturn(0);
146 }
147 EXTERN_C_END
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
152 {
153   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
154   PetscScalar        *x,x1,s1;
155   const PetscScalar  *b;
156   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
157   PetscErrorCode     ierr;
158   PetscInt           m = a->mbs,i,i2,nz,j;
159   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
160 
161   PetscFunctionBegin;
162   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
163   its = its*lits;
164   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
169 
170   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
171 
172   diag  = a->diag;
173   idiag = a->idiag;
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
179       x[0] = b[0]*idiag[0];
180       i2     = 1;
181       idiag += 1;
182       for (i=1; i<m; i++) {
183         v     = aa + ai[i];
184         vi    = aj + ai[i];
185         nz    = diag[i] - ai[i];
186         s1    = b[i2];
187         for (j=0; j<nz; j++) {
188           s1 -= v[j]*x[vi[j]];
189         }
190         x[i2]   = idiag[0]*s1;
191         idiag   += 1;
192         i2      += 1;
193       }
194       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
195       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
196     }
197     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
198         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
199       i2    = 0;
200       mdiag = a->idiag+a->mbs;
201       for (i=0; i<m; i++) {
202         x1      = x[i2];
203         x[i2]   = mdiag[0]*x1;
204         mdiag  += 1;
205         i2     += 1;
206       }
207       ierr = PetscLogFlops(m);CHKERRQ(ierr);
208     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
209       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
210     }
211     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
212       idiag   = a->idiag+a->mbs - 1;
213       i2      = m - 1;
214       x1      = x[i2];
215       x[i2]   = idiag[0]*x1;
216       idiag -= 1;
217       i2    -= 1;
218       for (i=m-2; i>=0; i--) {
219         v     = aa + (diag[i]+1);
220         vi    = aj + diag[i] + 1;
221         nz    = ai[i+1] - diag[i] - 1;
222         s1    = x[i2];
223         for (j=0; j<nz; j++) {
224           s1 -= v[j]*x[vi[j]];
225         }
226         x[i2]   = idiag[0]*s1;
227         idiag   -= 1;
228         i2      -= 1;
229       }
230       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
231     }
232   } else {
233     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
234   }
235   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
243 {
244   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
245   PetscScalar        *x,x1,x2,s1,s2;
246   const PetscScalar  *b;
247   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
248   PetscErrorCode     ierr;
249   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
250   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
251 
252   PetscFunctionBegin;
253   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
254   its = its*lits;
255   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
256   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
257   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
258   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
259   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
260 
261   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
262 
263   diag  = a->diag;
264   idiag = a->idiag;
265   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
266   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
267 
268   if (flag & SOR_ZERO_INITIAL_GUESS) {
269     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
270       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
271       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
272       i2     = 2;
273       idiag += 4;
274       for (i=1; i<m; i++) {
275 	v     = aa + 4*ai[i];
276 	vi    = aj + ai[i];
277 	nz    = diag[i] - ai[i];
278 	s1    = b[i2]; s2 = b[i2+1];
279         for (j=0; j<nz; j++) {
280 	  idx  = 2*vi[j];
281           it   = 4*j;
282 	  x1   = x[idx]; x2 = x[1+idx];
283 	  s1  -= v[it]*x1 + v[it+2]*x2;
284 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
285 	}
286 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
287 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
288         idiag   += 4;
289         i2      += 2;
290       }
291       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
292       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
293     }
294     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
295         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
296       i2    = 0;
297       mdiag = a->idiag+4*a->mbs;
298       for (i=0; i<m; i++) {
299         x1      = x[i2]; x2 = x[i2+1];
300         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
301         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
302         mdiag  += 4;
303         i2     += 2;
304       }
305       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
306     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
307       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
308     }
309     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
310       idiag   = a->idiag+4*a->mbs - 4;
311       i2      = 2*m - 2;
312       x1      = x[i2]; x2 = x[i2+1];
313       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
314       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
315       idiag -= 4;
316       i2    -= 2;
317       for (i=m-2; i>=0; i--) {
318 	v     = aa + 4*(diag[i]+1);
319 	vi    = aj + diag[i] + 1;
320 	nz    = ai[i+1] - diag[i] - 1;
321 	s1    = x[i2]; s2 = x[i2+1];
322         for (j=0; j<nz; j++) {
323  	  idx  = 2*vi[j];
324           it   = 4*j;
325 	  x1   = x[idx]; x2 = x[1+idx];
326 	  s1  -= v[it]*x1 + v[it+2]*x2;
327 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
328 	}
329 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
330 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
331         idiag   -= 4;
332         i2      -= 2;
333       }
334       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
335     }
336   } else {
337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
338   }
339   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
347 {
348   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
349   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
350   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
351   const PetscScalar  *b;
352   PetscErrorCode     ierr;
353   PetscInt           m = a->mbs,i,i2,nz,idx;
354   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
355 
356   PetscFunctionBegin;
357   its = its*lits;
358   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
359   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
360   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
361   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
362   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
363   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
364 
365   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
366 
367   diag  = a->diag;
368   idiag = a->idiag;
369   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
370   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
371 
372   if (flag & SOR_ZERO_INITIAL_GUESS) {
373     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
374       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
375       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
376       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
377       i2     = 3;
378       idiag += 9;
379       for (i=1; i<m; i++) {
380         v     = aa + 9*ai[i];
381         vi    = aj + ai[i];
382         nz    = diag[i] - ai[i];
383         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
384         while (nz--) {
385           idx  = 3*(*vi++);
386           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
387           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
388           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
389           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
390           v   += 9;
391         }
392         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
393         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
394         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
395         idiag   += 9;
396         i2      += 3;
397       }
398       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
399       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
400     }
401     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
402         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
403       i2    = 0;
404       mdiag = a->idiag+9*a->mbs;
405       for (i=0; i<m; i++) {
406         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
408         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
409         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
410         mdiag  += 9;
411         i2     += 3;
412       }
413       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
414     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
415       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
416     }
417     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
418       idiag   = a->idiag+9*a->mbs - 9;
419       i2      = 3*m - 3;
420       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
421       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
422       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
423       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
424       idiag -= 9;
425       i2    -= 3;
426       for (i=m-2; i>=0; i--) {
427         v     = aa + 9*(diag[i]+1);
428         vi    = aj + diag[i] + 1;
429         nz    = ai[i+1] - diag[i] - 1;
430         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
431         while (nz--) {
432           idx  = 3*(*vi++);
433           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
434           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
435           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
436           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
437           v   += 9;
438         }
439         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
440         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
441         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
442         idiag   -= 9;
443         i2      -= 3;
444       }
445       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
446     }
447   } else {
448     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
449   }
450   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
451   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
458 {
459   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
460   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
461   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
462   const PetscScalar  *b;
463   PetscErrorCode     ierr;
464   PetscInt           m = a->mbs,i,i2,nz,idx;
465   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
466 
467   PetscFunctionBegin;
468   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
469   its = its*lits;
470   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
471   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
472   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
473   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
474   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
475 
476   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
477 
478   diag  = a->diag;
479   idiag = a->idiag;
480   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
481   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
482 
483   if (flag & SOR_ZERO_INITIAL_GUESS) {
484     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
485       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
486       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
487       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
488       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
489       i2     = 4;
490       idiag += 16;
491       for (i=1; i<m; i++) {
492 	v     = aa + 16*ai[i];
493 	vi    = aj + ai[i];
494 	nz    = diag[i] - ai[i];
495 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
496 	while (nz--) {
497 	  idx  = 4*(*vi++);
498 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
499 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
500 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
501 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
502 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
503 	  v   += 16;
504 	}
505 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
506 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
507 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
508 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
509         idiag   += 16;
510         i2      += 4;
511       }
512       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
513       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
514     }
515     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
516         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
517       i2    = 0;
518       mdiag = a->idiag+16*a->mbs;
519       for (i=0; i<m; i++) {
520         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
521         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
522         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
523         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
524         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
525         mdiag  += 16;
526         i2     += 4;
527       }
528       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
529     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
530       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
531     }
532     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
533       idiag   = a->idiag+16*a->mbs - 16;
534       i2      = 4*m - 4;
535       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
536       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
537       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
538       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
539       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
540       idiag -= 16;
541       i2    -= 4;
542       for (i=m-2; i>=0; i--) {
543 	v     = aa + 16*(diag[i]+1);
544 	vi    = aj + diag[i] + 1;
545 	nz    = ai[i+1] - diag[i] - 1;
546 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
547 	while (nz--) {
548 	  idx  = 4*(*vi++);
549 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
550 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
551 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
552 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
553 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
554 	  v   += 16;
555 	}
556 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
557 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
558 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
559 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
560         idiag   -= 16;
561         i2      -= 4;
562       }
563       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
564     }
565   } else {
566     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
567   }
568   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
576 {
577   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
578   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
579   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
580   const PetscScalar  *b;
581   PetscErrorCode     ierr;
582   PetscInt           m = a->mbs,i,i2,nz,idx;
583   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
584 
585   PetscFunctionBegin;
586   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
587   its = its*lits;
588   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
589   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
590   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
591   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
592   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
593 
594   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
595 
596   diag  = a->diag;
597   idiag = a->idiag;
598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
599   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
600 
601   if (flag & SOR_ZERO_INITIAL_GUESS) {
602     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
603       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
604       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
605       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
606       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
607       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
608       i2     = 5;
609       idiag += 25;
610       for (i=1; i<m; i++) {
611 	v     = aa + 25*ai[i];
612 	vi    = aj + ai[i];
613 	nz    = diag[i] - ai[i];
614 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
615 	while (nz--) {
616 	  idx  = 5*(*vi++);
617 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
618 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
619 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
620 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
621 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
622 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
623 	  v   += 25;
624 	}
625 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
626 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
627 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
628 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
629 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
630         idiag   += 25;
631         i2      += 5;
632       }
633       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
634       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
635     }
636     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
637         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
638       i2    = 0;
639       mdiag = a->idiag+25*a->mbs;
640       for (i=0; i<m; i++) {
641         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
642         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
643         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
644         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
645         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
646         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
647         mdiag  += 25;
648         i2     += 5;
649       }
650       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
651     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
652       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
653     }
654     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
655       idiag   = a->idiag+25*a->mbs - 25;
656       i2      = 5*m - 5;
657       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
658       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
659       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
660       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
661       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
662       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
663       idiag -= 25;
664       i2    -= 5;
665       for (i=m-2; i>=0; i--) {
666 	v     = aa + 25*(diag[i]+1);
667 	vi    = aj + diag[i] + 1;
668 	nz    = ai[i+1] - diag[i] - 1;
669 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
670 	while (nz--) {
671 	  idx  = 5*(*vi++);
672 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
673 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
674 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
675 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
676 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
677 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
678 	  v   += 25;
679 	}
680 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
681 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
682 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
683 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
684 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
685         idiag   -= 25;
686         i2      -= 5;
687       }
688       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
689     }
690   } else {
691     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
692   }
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
701 {
702   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
703   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
704   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
705   const PetscScalar  *b;
706   PetscErrorCode     ierr;
707   PetscInt           m = a->mbs,i,i2,nz,idx;
708   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
709 
710   PetscFunctionBegin;
711   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
712   its = its*lits;
713   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
714   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
715   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
716   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
717   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
718 
719   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
720 
721   diag  = a->diag;
722   idiag = a->idiag;
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
725 
726   if (flag & SOR_ZERO_INITIAL_GUESS) {
727     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
728       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
729       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
730       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
731       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
732       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
733       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
734       i2     = 6;
735       idiag += 36;
736       for (i=1; i<m; i++) {
737         v     = aa + 36*ai[i];
738         vi    = aj + ai[i];
739         nz    = diag[i] - ai[i];
740         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
741         while (nz--) {
742           idx  = 6*(*vi++);
743           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
744           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
745           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
746           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
747           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
748           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
749           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
750           v   += 36;
751         }
752         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
753         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
754         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
755         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
756         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
757         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
758         idiag   += 36;
759         i2      += 6;
760       }
761       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
762       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
763     }
764     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
765         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
766       i2    = 0;
767       mdiag = a->idiag+36*a->mbs;
768       for (i=0; i<m; i++) {
769         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
770         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
771         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
772         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
773         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
774         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
775         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
776         mdiag  += 36;
777         i2     += 6;
778       }
779       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
780     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
781       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
782     }
783     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
784       idiag   = a->idiag+36*a->mbs - 36;
785       i2      = 6*m - 6;
786       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
787       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
788       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
789       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
790       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
791       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
792       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
793       idiag -= 36;
794       i2    -= 6;
795       for (i=m-2; i>=0; i--) {
796         v     = aa + 36*(diag[i]+1);
797         vi    = aj + diag[i] + 1;
798         nz    = ai[i+1] - diag[i] - 1;
799         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
800         while (nz--) {
801           idx  = 6*(*vi++);
802           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
803           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
804           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
805           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
806           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
807           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
808           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
809           v   += 36;
810         }
811         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
812         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
813         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
814         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
815         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
816         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
817         idiag   -= 36;
818         i2      -= 6;
819       }
820       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
821     }
822   } else {
823     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
824   }
825   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
833 {
834   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
835   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
836   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
837   const PetscScalar  *b;
838   PetscErrorCode     ierr;
839   PetscInt           m = a->mbs,i,i2,nz,idx;
840   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
841 
842   PetscFunctionBegin;
843   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
844   its = its*lits;
845   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
846   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
847   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
848   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
849   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
850 
851   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
852 
853   diag  = a->diag;
854   idiag = a->idiag;
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
857 
858   if (flag & SOR_ZERO_INITIAL_GUESS) {
859     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
860       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
861       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
862       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
863       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
864       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
865       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
866       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
867       i2     = 7;
868       idiag += 49;
869       for (i=1; i<m; i++) {
870         v     = aa + 49*ai[i];
871         vi    = aj + ai[i];
872         nz    = diag[i] - ai[i];
873         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
874         while (nz--) {
875           idx  = 7*(*vi++);
876           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
877           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
878           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
879           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
880           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
881           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
882           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
883           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
884           v   += 49;
885         }
886         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
887         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
888         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
889         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
890         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
891         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
892         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
893         idiag   += 49;
894         i2      += 7;
895       }
896       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
897       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
898     }
899     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
900         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
901       i2    = 0;
902       mdiag = a->idiag+49*a->mbs;
903       for (i=0; i<m; i++) {
904         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
905         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
906         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
907         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
908         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
909         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
910         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
911         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
912         mdiag  += 49;
913         i2     += 7;
914       }
915       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
916     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
917       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
918     }
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
920       idiag   = a->idiag+49*a->mbs - 49;
921       i2      = 7*m - 7;
922       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
923       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
924       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
925       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
926       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
927       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
928       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
929       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
930       idiag -= 49;
931       i2    -= 7;
932       for (i=m-2; i>=0; i--) {
933         v     = aa + 49*(diag[i]+1);
934         vi    = aj + diag[i] + 1;
935         nz    = ai[i+1] - diag[i] - 1;
936         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
937         while (nz--) {
938           idx  = 7*(*vi++);
939           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
940           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
941           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
942           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
943           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
944           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
945           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
946           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
947           v   += 49;
948         }
949         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
950         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
951         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
952         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
953         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
954         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
955         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
956         idiag   -= 49;
957         i2      -= 7;
958       }
959       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
960     }
961   } else {
962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
963   }
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
972 {
973   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
974   PetscScalar        *x,*work,*w,*workt;
975   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
976   const PetscScalar  *b;
977   PetscErrorCode     ierr;
978   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
979   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
980 
981   PetscFunctionBegin;
982   its = its*lits;
983   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
984   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
985   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
986   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
987   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
988   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
989 
990   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
991 
992   diag  = a->diag;
993   idiag = a->idiag;
994   if (!a->mult_work) {
995     k    = PetscMax(A->rmap->n,A->cmap->n);
996     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
997   }
998   work = a->mult_work;
999   if (!a->sor_work) {
1000     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1001   }
1002   w = a->sor_work;
1003 
1004   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1006 
1007   if (flag & SOR_ZERO_INITIAL_GUESS) {
1008     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1009       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1010       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1011       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1012       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1013       i2     = bs;
1014       idiag += bs2;
1015       for (i=1; i<m; i++) {
1016         v     = aa + bs2*ai[i];
1017         vi    = aj + ai[i];
1018         nz    = diag[i] - ai[i];
1019 
1020         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1021 	/* copy all rows of x that are needed into contiguous space */
1022         workt = work;
1023         for (j=0; j<nz; j++) {
1024           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1025           workt += bs;
1026         }
1027         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1028        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1029         while (nz--) {
1030           idx  = N*(*vi++);
1031           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1032           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1033           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1034           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1035           v   += N2;
1036 	  } */
1037 
1038         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1039         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1040         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1041         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1042 
1043         idiag   += bs2;
1044         i2      += bs;
1045       }
1046       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1047       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1048     }
1049     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1050         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1051       i2    = 0;
1052       mdiag = a->idiag+bs2*a->mbs;
1053       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1054       for (i=0; i<m; i++) {
1055         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1056         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1057         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1058         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1059         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1060 
1061         mdiag  += bs2;
1062         i2     += bs;
1063       }
1064       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1065     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1066       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1067     }
1068     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1069       idiag   = a->idiag+bs2*a->mbs - bs2;
1070       i2      = bs*m - bs;
1071       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1072       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1073       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1074       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1075       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1076       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1077       idiag -= bs2;
1078       i2    -= bs;
1079       for (i=m-2; i>=0; i--) {
1080         v     = aa + bs2*(diag[i]+1);
1081         vi    = aj + diag[i] + 1;
1082         nz    = ai[i+1] - diag[i] - 1;
1083 
1084         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1085 	/* copy all rows of x that are needed into contiguous space */
1086         workt = work;
1087         for (j=0; j<nz; j++) {
1088           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1089           workt += bs;
1090         }
1091         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1092         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1093         while (nz--) {
1094           idx  = N*(*vi++);
1095           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1096           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1097           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1098           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1099           v   += N2;
1100 	  } */
1101 
1102         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1103         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1104         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1105         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1106         idiag   -= bs2;
1107         i2      -= bs;
1108       }
1109       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1110     }
1111   } else {
1112     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1113   }
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1121 */
1122 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1125 #define matsetvaluesblocked4_ matsetvaluesblocked4
1126 #endif
1127 
1128 EXTERN_C_BEGIN
1129 #undef __FUNCT__
1130 #define __FUNCT__ "matsetvaluesblocked4_"
1131 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1132 {
1133   Mat               A = *AA;
1134   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1135   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1136   PetscInt          *ai=a->i,*ailen=a->ilen;
1137   PetscInt          *aj=a->j,stepval,lastcol = -1;
1138   const PetscScalar *value = v;
1139   MatScalar         *ap,*aa = a->a,*bap;
1140 
1141   PetscFunctionBegin;
1142   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1143   stepval = (n-1)*4;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k];
1146     rp   = aj + ai[row];
1147     ap   = aa + 16*ai[row];
1148     nrow = ailen[row];
1149     low  = 0;
1150     high = nrow;
1151     for (l=0; l<n; l++) { /* loop over added columns */
1152       col = in[l];
1153       if (col <= lastcol) low = 0; else high = nrow;
1154       lastcol = col;
1155       value = v + k*(stepval+4 + l)*4;
1156       while (high-low > 7) {
1157         t = (low+high)/2;
1158         if (rp[t] > col) high = t;
1159         else             low  = t;
1160       }
1161       for (i=low; i<high; i++) {
1162         if (rp[i] > col) break;
1163         if (rp[i] == col) {
1164           bap  = ap +  16*i;
1165           for (ii=0; ii<4; ii++,value+=stepval) {
1166             for (jj=ii; jj<16; jj+=4) {
1167               bap[jj] += *value++;
1168             }
1169           }
1170           goto noinsert2;
1171         }
1172       }
1173       N = nrow++ - 1;
1174       high++; /* added new column index thus must search to one higher than before */
1175       /* shift up all the later entries in this row */
1176       for (ii=N; ii>=i; ii--) {
1177         rp[ii+1] = rp[ii];
1178         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1179       }
1180       if (N >= i) {
1181         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1182       }
1183       rp[i] = col;
1184       bap   = ap +  16*i;
1185       for (ii=0; ii<4; ii++,value+=stepval) {
1186         for (jj=ii; jj<16; jj+=4) {
1187           bap[jj] = *value++;
1188         }
1189       }
1190       noinsert2:;
1191       low = i;
1192     }
1193     ailen[row] = nrow;
1194   }
1195   PetscFunctionReturnVoid();
1196 }
1197 EXTERN_C_END
1198 
1199 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1200 #define matsetvalues4_ MATSETVALUES4
1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1202 #define matsetvalues4_ matsetvalues4
1203 #endif
1204 
1205 EXTERN_C_BEGIN
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSetValues4_"
1208 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1209 {
1210   Mat         A = *AA;
1211   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1212   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1213   PetscInt    *ai=a->i,*ailen=a->ilen;
1214   PetscInt    *aj=a->j,brow,bcol;
1215   PetscInt    ridx,cidx,lastcol = -1;
1216   MatScalar   *ap,value,*aa=a->a,*bap;
1217 
1218   PetscFunctionBegin;
1219   for (k=0; k<m; k++) { /* loop over added rows */
1220     row  = im[k]; brow = row/4;
1221     rp   = aj + ai[brow];
1222     ap   = aa + 16*ai[brow];
1223     nrow = ailen[brow];
1224     low  = 0;
1225     high = nrow;
1226     for (l=0; l<n; l++) { /* loop over added columns */
1227       col = in[l]; bcol = col/4;
1228       ridx = row % 4; cidx = col % 4;
1229       value = v[l + k*n];
1230       if (col <= lastcol) low = 0; else high = nrow;
1231       lastcol = col;
1232       while (high-low > 7) {
1233         t = (low+high)/2;
1234         if (rp[t] > bcol) high = t;
1235         else              low  = t;
1236       }
1237       for (i=low; i<high; i++) {
1238         if (rp[i] > bcol) break;
1239         if (rp[i] == bcol) {
1240           bap  = ap +  16*i + 4*cidx + ridx;
1241           *bap += value;
1242           goto noinsert1;
1243         }
1244       }
1245       N = nrow++ - 1;
1246       high++; /* added new column thus must search to one higher than before */
1247       /* shift up all the later entries in this row */
1248       for (ii=N; ii>=i; ii--) {
1249         rp[ii+1] = rp[ii];
1250         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1251       }
1252       if (N>=i) {
1253         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1254       }
1255       rp[i]                    = bcol;
1256       ap[16*i + 4*cidx + ridx] = value;
1257       noinsert1:;
1258       low = i;
1259     }
1260     ailen[brow] = nrow;
1261   }
1262   PetscFunctionReturnVoid();
1263 }
1264 EXTERN_C_END
1265 
1266 /*
1267      Checks for missing diagonals
1268 */
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1272 {
1273   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1274   PetscErrorCode ierr;
1275   PetscInt       *diag,*jj = a->j,i;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1279   *missing = PETSC_FALSE;
1280   if (A->rmap->n > 0 && !jj) {
1281     *missing  = PETSC_TRUE;
1282     if (d) *d = 0;
1283     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1284   } else {
1285     diag     = a->diag;
1286     for (i=0; i<a->mbs; i++) {
1287       if (jj[diag[i]] != i) {
1288         *missing  = PETSC_TRUE;
1289         if (d) *d = i;
1290         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1291       }
1292     }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1300 {
1301   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303   PetscInt       i,j,m = a->mbs;
1304 
1305   PetscFunctionBegin;
1306   if (!a->diag) {
1307     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1308     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1309     a->free_diag = PETSC_TRUE;
1310   }
1311   for (i=0; i<m; i++) {
1312     a->diag[i] = a->i[i+1];
1313     for (j=a->i[i]; j<a->i[i+1]; j++) {
1314       if (a->j[j] == i) {
1315         a->diag[i] = j;
1316         break;
1317       }
1318     }
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 
1324 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1333   PetscInt       *tia, *tja;
1334 
1335   PetscFunctionBegin;
1336   *nn = n;
1337   if (!ia) PetscFunctionReturn(0);
1338   if (symmetric) {
1339     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1340     nz   = tia[n];
1341   } else {
1342     tia = a->i; tja = a->j;
1343   }
1344 
1345   if (!blockcompressed && bs > 1) {
1346     (*nn) *= bs;
1347     /* malloc & create the natural set of indices */
1348     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1349     if (n) {
1350       (*ia)[0] = 0;
1351       for (j=1; j<bs; j++) {
1352         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1353       }
1354     }
1355 
1356     for (i=1; i<n; i++) {
1357       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1358       for (j=1; j<bs; j++) {
1359         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1360       }
1361     }
1362     if (n) {
1363       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1364     }
1365 
1366     if (ja) {
1367       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1368       cnt = 0;
1369       for (i=0; i<n; i++) {
1370         for (j=0; j<bs; j++) {
1371           for (k=tia[i]; k<tia[i+1]; k++) {
1372             for (l=0; l<bs; l++) {
1373               (*ja)[cnt++] = bs*tja[k] + l;
1374 	    }
1375           }
1376         }
1377       }
1378     }
1379 
1380     n     *= bs;
1381     nz *= bs*bs;
1382     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1383       ierr = PetscFree(tia);CHKERRQ(ierr);
1384       ierr = PetscFree(tja);CHKERRQ(ierr);
1385     }
1386   } else if (oshift == 1) {
1387     if (symmetric) {
1388       PetscInt nz = tia[A->rmap->n/bs];
1389       /*  add 1 to i and j indices */
1390       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1391       *ia = tia;
1392       if (ja) {
1393 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1394         *ja = tja;
1395       }
1396     } else {
1397       PetscInt nz = a->i[A->rmap->n/bs];
1398       /* malloc space and  add 1 to i and j indices */
1399       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1400       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1401       if (ja) {
1402 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1403 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1404       }
1405     }
1406   } else {
1407     *ia = tia;
1408     if (ja) *ja = tja;
1409   }
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1417 {
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (!ia) PetscFunctionReturn(0);
1422   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1423     ierr = PetscFree(*ia);CHKERRQ(ierr);
1424     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1432 {
1433   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437 #if defined(PETSC_USE_LOG)
1438   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1439 #endif
1440   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1441   if (a->row) {
1442     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1443   }
1444   if (a->col) {
1445     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1446   }
1447   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1448   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1449   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1450   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1451   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1452   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1453   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1455   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1456   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1457 
1458   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1459   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1460   ierr = PetscFree(a);CHKERRQ(ierr);
1461 
1462   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1468   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1477 {
1478   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   switch (op) {
1483   case MAT_ROW_ORIENTED:
1484     a->roworiented    = flg;
1485     break;
1486   case MAT_KEEP_NONZERO_PATTERN:
1487     a->keepnonzeropattern = flg;
1488     break;
1489   case MAT_NEW_NONZERO_LOCATIONS:
1490     a->nonew          = (flg ? 0 : 1);
1491     break;
1492   case MAT_NEW_NONZERO_LOCATION_ERR:
1493     a->nonew          = (flg ? -1 : 0);
1494     break;
1495   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1496     a->nonew          = (flg ? -2 : 0);
1497     break;
1498   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1499     a->nounused       = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_DIAGONALS:
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503   case MAT_USE_HASH_TABLE:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_SYMMETRIC:
1507   case MAT_STRUCTURALLY_SYMMETRIC:
1508   case MAT_HERMITIAN:
1509   case MAT_SYMMETRY_ETERNAL:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   default:
1513     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1525   MatScalar      *aa,*aa_i;
1526   PetscScalar    *v_i;
1527 
1528   PetscFunctionBegin;
1529   bs  = A->rmap->bs;
1530   ai  = a->i;
1531   aj  = a->j;
1532   aa  = a->a;
1533   bs2 = a->bs2;
1534 
1535   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1536 
1537   bn  = row/bs;   /* Block number */
1538   bp  = row % bs; /* Block Position */
1539   M   = ai[bn+1] - ai[bn];
1540   *nz = bs*M;
1541 
1542   if (v) {
1543     *v = 0;
1544     if (*nz) {
1545       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1546       for (i=0; i<M; i++) { /* for each block in the block row */
1547         v_i  = *v + i*bs;
1548         aa_i = aa + bs2*(ai[bn] + i);
1549         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1550       }
1551     }
1552   }
1553 
1554   if (idx) {
1555     *idx = 0;
1556     if (*nz) {
1557       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1558       for (i=0; i<M; i++) { /* for each block in the block row */
1559         idx_i = *idx + i*bs;
1560         itmp  = bs*aj[ai[bn] + i];
1561         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1562       }
1563     }
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1571 {
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1576   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1585 {
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1587   Mat            C;
1588   PetscErrorCode ierr;
1589   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1590   PetscInt       *rows,*cols,bs2=a->bs2;
1591   MatScalar      *array;
1592 
1593   PetscFunctionBegin;
1594   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1595   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1596     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1597     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1598 
1599     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1600     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1601     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1602     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1603     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1604     ierr = PetscFree(col);CHKERRQ(ierr);
1605   } else {
1606     C = *B;
1607   }
1608 
1609   array = a->a;
1610   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1611   for (i=0; i<mbs; i++) {
1612     cols[0] = i*bs;
1613     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1614     len = ai[i+1] - ai[i];
1615     for (j=0; j<len; j++) {
1616       rows[0] = (*aj++)*bs;
1617       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1618       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1619       array += bs2;
1620     }
1621   }
1622   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1623 
1624   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626 
1627   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1628     *B = C;
1629   } else {
1630     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1637 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1638 {
1639   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1640   PetscErrorCode ierr;
1641   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1642   int            fd;
1643   PetscScalar    *aa;
1644   FILE           *file;
1645 
1646   PetscFunctionBegin;
1647   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1648   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1649   col_lens[0] = MAT_FILE_CLASSID;
1650 
1651   col_lens[1] = A->rmap->N;
1652   col_lens[2] = A->cmap->n;
1653   col_lens[3] = a->nz*bs2;
1654 
1655   /* store lengths of each row and write (including header) to file */
1656   count = 0;
1657   for (i=0; i<a->mbs; i++) {
1658     for (j=0; j<bs; j++) {
1659       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1660     }
1661   }
1662   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1663   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1664 
1665   /* store column indices (zero start index) */
1666   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1667   count = 0;
1668   for (i=0; i<a->mbs; i++) {
1669     for (j=0; j<bs; j++) {
1670       for (k=a->i[i]; k<a->i[i+1]; k++) {
1671         for (l=0; l<bs; l++) {
1672           jj[count++] = bs*a->j[k] + l;
1673         }
1674       }
1675     }
1676   }
1677   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1678   ierr = PetscFree(jj);CHKERRQ(ierr);
1679 
1680   /* store nonzero values */
1681   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1682   count = 0;
1683   for (i=0; i<a->mbs; i++) {
1684     for (j=0; j<bs; j++) {
1685       for (k=a->i[i]; k<a->i[i+1]; k++) {
1686         for (l=0; l<bs; l++) {
1687           aa[count++] = a->a[bs2*k + l*bs + j];
1688         }
1689       }
1690     }
1691   }
1692   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1693   ierr = PetscFree(aa);CHKERRQ(ierr);
1694 
1695   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1696   if (file) {
1697     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1704 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1705 {
1706   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1707   PetscErrorCode    ierr;
1708   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1709   PetscViewerFormat format;
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1713   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1714     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1716     Mat aij;
1717     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1718     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1719     ierr = MatDestroy(aij);CHKERRQ(ierr);
1720   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1721      PetscFunctionReturn(0);
1722   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1723     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1724     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1725     for (i=0; i<a->mbs; i++) {
1726       for (j=0; j<bs; j++) {
1727         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1728         for (k=a->i[i]; k<a->i[i+1]; k++) {
1729           for (l=0; l<bs; l++) {
1730 #if defined(PETSC_USE_COMPLEX)
1731             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1736                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             }
1740 #else
1741             if (a->a[bs2*k + l*bs + j] != 0.0) {
1742               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1743             }
1744 #endif
1745           }
1746         }
1747         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1748       }
1749     }
1750     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1751   } else {
1752     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1753     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1754     for (i=0; i<a->mbs; i++) {
1755       for (j=0; j<bs; j++) {
1756         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1757         for (k=a->i[i]; k<a->i[i+1]; k++) {
1758           for (l=0; l<bs; l++) {
1759 #if defined(PETSC_USE_COMPLEX)
1760             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1765                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1766             } else {
1767               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1768             }
1769 #else
1770             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1771 #endif
1772           }
1773         }
1774         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1775       }
1776     }
1777     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1778   }
1779   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1785 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1786 {
1787   Mat            A = (Mat) Aa;
1788   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1789   PetscErrorCode ierr;
1790   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1791   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1792   MatScalar      *aa;
1793   PetscViewer    viewer;
1794 
1795   PetscFunctionBegin;
1796 
1797   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1798   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1799 
1800   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1801 
1802   /* loop over matrix elements drawing boxes */
1803   color = PETSC_DRAW_BLUE;
1804   for (i=0,row=0; i<mbs; i++,row+=bs) {
1805     for (j=a->i[i]; j<a->i[i+1]; j++) {
1806       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1807       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1808       aa = a->a + j*bs2;
1809       for (k=0; k<bs; k++) {
1810         for (l=0; l<bs; l++) {
1811           if (PetscRealPart(*aa++) >=  0.) continue;
1812           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1813         }
1814       }
1815     }
1816   }
1817   color = PETSC_DRAW_CYAN;
1818   for (i=0,row=0; i<mbs; i++,row+=bs) {
1819     for (j=a->i[i]; j<a->i[i+1]; j++) {
1820       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1821       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1822       aa = a->a + j*bs2;
1823       for (k=0; k<bs; k++) {
1824         for (l=0; l<bs; l++) {
1825           if (PetscRealPart(*aa++) != 0.) continue;
1826           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1827         }
1828       }
1829     }
1830   }
1831 
1832   color = PETSC_DRAW_RED;
1833   for (i=0,row=0; i<mbs; i++,row+=bs) {
1834     for (j=a->i[i]; j<a->i[i+1]; j++) {
1835       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1836       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1837       aa = a->a + j*bs2;
1838       for (k=0; k<bs; k++) {
1839         for (l=0; l<bs; l++) {
1840           if (PetscRealPart(*aa++) <= 0.) continue;
1841           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1842         }
1843       }
1844     }
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1851 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1852 {
1853   PetscErrorCode ierr;
1854   PetscReal      xl,yl,xr,yr,w,h;
1855   PetscDraw      draw;
1856   PetscBool      isnull;
1857 
1858   PetscFunctionBegin;
1859 
1860   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1861   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1862 
1863   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1864   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1865   xr += w;    yr += h;  xl = -w;     yl = -h;
1866   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1867   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1868   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatView_SeqBAIJ"
1874 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1875 {
1876   PetscErrorCode ierr;
1877   PetscBool      iascii,isbinary,isdraw;
1878 
1879   PetscFunctionBegin;
1880   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1881   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1882   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1883   if (iascii){
1884     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1885   } else if (isbinary) {
1886     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1887   } else if (isdraw) {
1888     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1889   } else {
1890     Mat B;
1891     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1892     ierr = MatView(B,viewer);CHKERRQ(ierr);
1893     ierr = MatDestroy(B);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1901 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1902 {
1903   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1905   PetscInt    *ai = a->i,*ailen = a->ilen;
1906   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1907   MatScalar   *ap,*aa = a->a;
1908 
1909   PetscFunctionBegin;
1910   for (k=0; k<m; k++) { /* loop over rows */
1911     row  = im[k]; brow = row/bs;
1912     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1913     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1914     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1915     nrow = ailen[brow];
1916     for (l=0; l<n; l++) { /* loop over columns */
1917       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1918       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1919       col  = in[l] ;
1920       bcol = col/bs;
1921       cidx = col%bs;
1922       ridx = row%bs;
1923       high = nrow;
1924       low  = 0; /* assume unsorted */
1925       while (high-low > 5) {
1926         t = (low+high)/2;
1927         if (rp[t] > bcol) high = t;
1928         else             low  = t;
1929       }
1930       for (i=low; i<high; i++) {
1931         if (rp[i] > bcol) break;
1932         if (rp[i] == bcol) {
1933           *v++ = ap[bs2*i+bs*cidx+ridx];
1934           goto finished;
1935         }
1936       }
1937       *v++ = 0.0;
1938       finished:;
1939     }
1940   }
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1946 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1947 {
1948   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1949   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1950   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1951   PetscErrorCode    ierr;
1952   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1953   PetscBool         roworiented=a->roworiented;
1954   const PetscScalar *value = v;
1955   MatScalar         *ap,*aa = a->a,*bap;
1956 
1957   PetscFunctionBegin;
1958   if (roworiented) {
1959     stepval = (n-1)*bs;
1960   } else {
1961     stepval = (m-1)*bs;
1962   }
1963   for (k=0; k<m; k++) { /* loop over added rows */
1964     row  = im[k];
1965     if (row < 0) continue;
1966 #if defined(PETSC_USE_DEBUG)
1967     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1968 #endif
1969     rp   = aj + ai[row];
1970     ap   = aa + bs2*ai[row];
1971     rmax = imax[row];
1972     nrow = ailen[row];
1973     low  = 0;
1974     high = nrow;
1975     for (l=0; l<n; l++) { /* loop over added columns */
1976       if (in[l] < 0) continue;
1977 #if defined(PETSC_USE_DEBUG)
1978       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1979 #endif
1980       col = in[l];
1981       if (roworiented) {
1982         value = v + (k*(stepval+bs) + l)*bs;
1983       } else {
1984         value = v + (l*(stepval+bs) + k)*bs;
1985       }
1986       if (col <= lastcol) low = 0; else high = nrow;
1987       lastcol = col;
1988       while (high-low > 7) {
1989         t = (low+high)/2;
1990         if (rp[t] > col) high = t;
1991         else             low  = t;
1992       }
1993       for (i=low; i<high; i++) {
1994         if (rp[i] > col) break;
1995         if (rp[i] == col) {
1996           bap  = ap +  bs2*i;
1997           if (roworiented) {
1998             if (is == ADD_VALUES) {
1999               for (ii=0; ii<bs; ii++,value+=stepval) {
2000                 for (jj=ii; jj<bs2; jj+=bs) {
2001                   bap[jj] += *value++;
2002                 }
2003               }
2004             } else {
2005               for (ii=0; ii<bs; ii++,value+=stepval) {
2006                 for (jj=ii; jj<bs2; jj+=bs) {
2007                   bap[jj] = *value++;
2008                 }
2009               }
2010             }
2011           } else {
2012             if (is == ADD_VALUES) {
2013               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2014                 for (jj=0; jj<bs; jj++) {
2015                   bap[jj] += value[jj];
2016                 }
2017                 bap += bs;
2018               }
2019             } else {
2020               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2021                 for (jj=0; jj<bs; jj++) {
2022                   bap[jj]  = value[jj];
2023                 }
2024                 bap += bs;
2025               }
2026             }
2027           }
2028           goto noinsert2;
2029         }
2030       }
2031       if (nonew == 1) goto noinsert2;
2032       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2033       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2034       N = nrow++ - 1; high++;
2035       /* shift up all the later entries in this row */
2036       for (ii=N; ii>=i; ii--) {
2037         rp[ii+1] = rp[ii];
2038         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2039       }
2040       if (N >= i) {
2041         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2042       }
2043       rp[i] = col;
2044       bap   = ap +  bs2*i;
2045       if (roworiented) {
2046         for (ii=0; ii<bs; ii++,value+=stepval) {
2047           for (jj=ii; jj<bs2; jj+=bs) {
2048             bap[jj] = *value++;
2049           }
2050         }
2051       } else {
2052         for (ii=0; ii<bs; ii++,value+=stepval) {
2053           for (jj=0; jj<bs; jj++) {
2054             *bap++  = *value++;
2055           }
2056         }
2057       }
2058       noinsert2:;
2059       low = i;
2060     }
2061     ailen[row] = nrow;
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2068 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2069 {
2070   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2071   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2072   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2073   PetscErrorCode ierr;
2074   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2075   MatScalar      *aa = a->a,*ap;
2076   PetscReal      ratio=0.6;
2077 
2078   PetscFunctionBegin;
2079   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2080 
2081   if (m) rmax = ailen[0];
2082   for (i=1; i<mbs; i++) {
2083     /* move each row back by the amount of empty slots (fshift) before it*/
2084     fshift += imax[i-1] - ailen[i-1];
2085     rmax   = PetscMax(rmax,ailen[i]);
2086     if (fshift) {
2087       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2088       N = ailen[i];
2089       for (j=0; j<N; j++) {
2090         ip[j-fshift] = ip[j];
2091         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2092       }
2093     }
2094     ai[i] = ai[i-1] + ailen[i-1];
2095   }
2096   if (mbs) {
2097     fshift += imax[mbs-1] - ailen[mbs-1];
2098     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2099   }
2100   /* reset ilen and imax for each row */
2101   for (i=0; i<mbs; i++) {
2102     ailen[i] = imax[i] = ai[i+1] - ai[i];
2103   }
2104   a->nz = ai[mbs];
2105 
2106   /* diagonals may have moved, so kill the diagonal pointers */
2107   a->idiagvalid = PETSC_FALSE;
2108   if (fshift && a->diag) {
2109     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2110     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2111     a->diag = 0;
2112   }
2113   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2114   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2115   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2116   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2117   A->info.mallocs     += a->reallocs;
2118   a->reallocs          = 0;
2119   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2120 
2121   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2122   if (a->compressedrow.use){
2123     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2124   }
2125 
2126   A->same_nonzero = PETSC_TRUE;
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /*
2131    This function returns an array of flags which indicate the locations of contiguous
2132    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2133    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2134    Assume: sizes should be long enough to hold all the values.
2135 */
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2138 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2139 {
2140   PetscInt   i,j,k,row;
2141   PetscBool  flg;
2142 
2143   PetscFunctionBegin;
2144   for (i=0,j=0; i<n; j++) {
2145     row = idx[i];
2146     if (row%bs!=0) { /* Not the begining of a block */
2147       sizes[j] = 1;
2148       i++;
2149     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2150       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2151       i++;
2152     } else { /* Begining of the block, so check if the complete block exists */
2153       flg = PETSC_TRUE;
2154       for (k=1; k<bs; k++) {
2155         if (row+k != idx[i+k]) { /* break in the block */
2156           flg = PETSC_FALSE;
2157           break;
2158         }
2159       }
2160       if (flg) { /* No break in the bs */
2161         sizes[j] = bs;
2162         i+= bs;
2163       } else {
2164         sizes[j] = 1;
2165         i++;
2166       }
2167     }
2168   }
2169   *bs_max = j;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2175 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2176 {
2177   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2178   PetscErrorCode    ierr;
2179   PetscInt          i,j,k,count,*rows;
2180   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2181   PetscScalar       zero = 0.0;
2182   MatScalar         *aa;
2183   const PetscScalar *xx;
2184   PetscScalar       *bb;
2185 
2186   PetscFunctionBegin;
2187   /* fix right hand side if needed */
2188   if (x && b) {
2189     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2190     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2191     for (i=0; i<is_n; i++) {
2192       bb[is_idx[i]] = diag*xx[is_idx[i]];
2193     }
2194     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2195     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2196   }
2197 
2198   /* Make a copy of the IS and  sort it */
2199   /* allocate memory for rows,sizes */
2200   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2201 
2202   /* copy IS values to rows, and sort them */
2203   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2204   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2205 
2206   if (baij->keepnonzeropattern) {
2207     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2208     bs_max = is_n;
2209     A->same_nonzero = PETSC_TRUE;
2210   } else {
2211     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2212     A->same_nonzero = PETSC_FALSE;
2213   }
2214 
2215   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2216     row   = rows[j];
2217     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2218     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2219     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2220     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2221       if (diag != 0.0) {
2222         if (baij->ilen[row/bs] > 0) {
2223           baij->ilen[row/bs]       = 1;
2224           baij->j[baij->i[row/bs]] = row/bs;
2225           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2226         }
2227         /* Now insert all the diagonal values for this bs */
2228         for (k=0; k<bs; k++) {
2229           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2230         }
2231       } else { /* (diag == 0.0) */
2232         baij->ilen[row/bs] = 0;
2233       } /* end (diag == 0.0) */
2234     } else { /* (sizes[i] != bs) */
2235 #if defined (PETSC_USE_DEBUG)
2236       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2237 #endif
2238       for (k=0; k<count; k++) {
2239         aa[0] =  zero;
2240         aa    += bs;
2241       }
2242       if (diag != 0.0) {
2243         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2244       }
2245     }
2246   }
2247 
2248   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2249   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2255 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2256 {
2257   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2258   PetscErrorCode    ierr;
2259   PetscInt          i,j,k,count;
2260   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2261   PetscScalar       zero = 0.0;
2262   MatScalar         *aa;
2263   const PetscScalar *xx;
2264   PetscScalar       *bb;
2265   PetscBool         *zeroed,vecs = PETSC_FALSE;
2266 
2267   PetscFunctionBegin;
2268   /* fix right hand side if needed */
2269   if (x && b) {
2270     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2271     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2272     vecs = PETSC_TRUE;
2273   }
2274   A->same_nonzero = PETSC_TRUE;
2275 
2276   /* zero the columns */
2277   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2278   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2279   for (i=0; i<is_n; i++) {
2280     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2281     zeroed[is_idx[i]] = PETSC_TRUE;
2282   }
2283   for (i=0; i<A->rmap->N; i++) {
2284     if (!zeroed[i]) {
2285       row = i/bs;
2286       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2287         for (k=0; k<bs; k++) {
2288           col = bs*baij->j[j] + k;
2289 	  if (zeroed[col]) {
2290 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2291             if (vecs) bb[i] -= aa[0]*xx[col];
2292             aa[0] = 0.0;
2293           }
2294         }
2295       }
2296     } else if (vecs) bb[i] = diag*xx[i];
2297   }
2298   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2299   if (vecs) {
2300     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2301     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2302   }
2303 
2304   /* zero the rows */
2305   for (i=0; i<is_n; i++) {
2306     row   = is_idx[i];
2307     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2308     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2309     for (k=0; k<count; k++) {
2310       aa[0] =  zero;
2311       aa    += bs;
2312     }
2313     if (diag != 0.0) {
2314       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2315     }
2316   }
2317   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2323 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2327   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2328   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2329   PetscErrorCode ierr;
2330   PetscInt       ridx,cidx,bs2=a->bs2;
2331   PetscBool      roworiented=a->roworiented;
2332   MatScalar      *ap,value,*aa=a->a,*bap;
2333 
2334   PetscFunctionBegin;
2335   if (v) PetscValidScalarPointer(v,6);
2336   for (k=0; k<m; k++) { /* loop over added rows */
2337     row  = im[k];
2338     brow = row/bs;
2339     if (row < 0) continue;
2340 #if defined(PETSC_USE_DEBUG)
2341     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2342 #endif
2343     rp   = aj + ai[brow];
2344     ap   = aa + bs2*ai[brow];
2345     rmax = imax[brow];
2346     nrow = ailen[brow];
2347     low  = 0;
2348     high = nrow;
2349     for (l=0; l<n; l++) { /* loop over added columns */
2350       if (in[l] < 0) continue;
2351 #if defined(PETSC_USE_DEBUG)
2352       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2353 #endif
2354       col = in[l]; bcol = col/bs;
2355       ridx = row % bs; cidx = col % bs;
2356       if (roworiented) {
2357         value = v[l + k*n];
2358       } else {
2359         value = v[k + l*m];
2360       }
2361       if (col <= lastcol) low = 0; else high = nrow;
2362       lastcol = col;
2363       while (high-low > 7) {
2364         t = (low+high)/2;
2365         if (rp[t] > bcol) high = t;
2366         else              low  = t;
2367       }
2368       for (i=low; i<high; i++) {
2369         if (rp[i] > bcol) break;
2370         if (rp[i] == bcol) {
2371           bap  = ap +  bs2*i + bs*cidx + ridx;
2372           if (is == ADD_VALUES) *bap += value;
2373           else                  *bap  = value;
2374           goto noinsert1;
2375         }
2376       }
2377       if (nonew == 1) goto noinsert1;
2378       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2379       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2380       N = nrow++ - 1; high++;
2381       /* shift up all the later entries in this row */
2382       for (ii=N; ii>=i; ii--) {
2383         rp[ii+1] = rp[ii];
2384         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2385       }
2386       if (N>=i) {
2387         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2388       }
2389       rp[i]                      = bcol;
2390       ap[bs2*i + bs*cidx + ridx] = value;
2391       a->nz++;
2392       noinsert1:;
2393       low = i;
2394     }
2395     ailen[brow] = nrow;
2396   }
2397   A->same_nonzero = PETSC_FALSE;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2403 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2404 {
2405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2406   Mat            outA;
2407   PetscErrorCode ierr;
2408   PetscBool      row_identity,col_identity;
2409 
2410   PetscFunctionBegin;
2411   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2412   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2413   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2414   if (!row_identity || !col_identity) {
2415     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2416   }
2417 
2418   outA            = inA;
2419   inA->factortype = MAT_FACTOR_LU;
2420 
2421   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2422 
2423   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2424   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2425   a->row = row;
2426   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2427   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2428   a->col = col;
2429 
2430   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2431   if (a->icol) {
2432     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2433   }
2434   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2435   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2436 
2437   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2438   if (!a->solve_work) {
2439     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2441   }
2442   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2443 
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 EXTERN_C_BEGIN
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2450 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2451 {
2452   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2453   PetscInt    i,nz,mbs;
2454 
2455   PetscFunctionBegin;
2456   nz  = baij->maxnz;
2457   mbs = baij->mbs;
2458   for (i=0; i<nz; i++) {
2459     baij->j[i] = indices[i];
2460   }
2461   baij->nz = nz;
2462   for (i=0; i<mbs; i++) {
2463     baij->ilen[i] = baij->imax[i];
2464   }
2465   PetscFunctionReturn(0);
2466 }
2467 EXTERN_C_END
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2471 /*@
2472     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2473        in the matrix.
2474 
2475   Input Parameters:
2476 +  mat - the SeqBAIJ matrix
2477 -  indices - the column indices
2478 
2479   Level: advanced
2480 
2481   Notes:
2482     This can be called if you have precomputed the nonzero structure of the
2483   matrix and want to provide it to the matrix object to improve the performance
2484   of the MatSetValues() operation.
2485 
2486     You MUST have set the correct numbers of nonzeros per row in the call to
2487   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2488 
2489     MUST be called before any calls to MatSetValues();
2490 
2491 @*/
2492 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2493 {
2494   PetscErrorCode ierr;
2495 
2496   PetscFunctionBegin;
2497   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2498   PetscValidPointer(indices,2);
2499   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2505 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2506 {
2507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2508   PetscErrorCode ierr;
2509   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2510   PetscReal      atmp;
2511   PetscScalar    *x,zero = 0.0;
2512   MatScalar      *aa;
2513   PetscInt       ncols,brow,krow,kcol;
2514 
2515   PetscFunctionBegin;
2516   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2517   bs   = A->rmap->bs;
2518   aa   = a->a;
2519   ai   = a->i;
2520   aj   = a->j;
2521   mbs  = a->mbs;
2522 
2523   ierr = VecSet(v,zero);CHKERRQ(ierr);
2524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2525   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2527   for (i=0; i<mbs; i++) {
2528     ncols = ai[1] - ai[0]; ai++;
2529     brow  = bs*i;
2530     for (j=0; j<ncols; j++){
2531       for (kcol=0; kcol<bs; kcol++){
2532         for (krow=0; krow<bs; krow++){
2533           atmp = PetscAbsScalar(*aa);aa++;
2534           row = brow + krow;    /* row index */
2535           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2536           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2537         }
2538       }
2539       aj++;
2540     }
2541   }
2542   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatCopy_SeqBAIJ"
2548 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2549 {
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   /* If the two matrices have the same copy implementation, use fast copy. */
2554   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2555     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2556     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2557     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2558 
2559     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2560     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2561     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2562   } else {
2563     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2570 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2571 {
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2581 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2582 {
2583   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2584   PetscFunctionBegin;
2585   *array = a->a;
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2591 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2592 {
2593   PetscFunctionBegin;
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2599 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2600 {
2601   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2602   PetscErrorCode ierr;
2603   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2604   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2605 
2606   PetscFunctionBegin;
2607   if (str == SAME_NONZERO_PATTERN) {
2608     PetscScalar alpha = a;
2609     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2610   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2611     if (y->xtoy && y->XtoY != X) {
2612       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2613       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2614     }
2615     if (!y->xtoy) { /* get xtoy */
2616       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2617       y->XtoY = X;
2618       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2619     }
2620     bs2 = bs*bs;
2621     for (i=0; i<x->nz; i++) {
2622       j = 0;
2623       while (j < bs2){
2624         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2625         j++;
2626       }
2627     }
2628     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2629   } else {
2630     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2631   }
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2637 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2638 {
2639   PetscInt rbs,cbs;
2640   PetscErrorCode ierr;
2641 
2642   PetscFunctionBegin;
2643   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2644   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2645   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2646   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2652 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2653 {
2654   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2655   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2656   MatScalar      *aa = a->a;
2657 
2658   PetscFunctionBegin;
2659   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2665 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2666 {
2667   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2668   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2669   MatScalar      *aa = a->a;
2670 
2671   PetscFunctionBegin;
2672   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2677 
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2680 /*
2681     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2682 */
2683 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2684 {
2685   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2686   PetscErrorCode ierr;
2687   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2688   PetscInt       nz = a->i[m],row,*jj,mr,col;
2689 
2690   PetscFunctionBegin;
2691   *nn = n;
2692   if (!ia) PetscFunctionReturn(0);
2693   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2694   else {
2695     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2696     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2697     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2698     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2699     jj = a->j;
2700     for (i=0; i<nz; i++) {
2701       collengths[jj[i]]++;
2702     }
2703     cia[0] = oshift;
2704     for (i=0; i<n; i++) {
2705       cia[i+1] = cia[i] + collengths[i];
2706     }
2707     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2708     jj   = a->j;
2709     for (row=0; row<m; row++) {
2710       mr = a->i[row+1] - a->i[row];
2711       for (i=0; i<mr; i++) {
2712         col = *jj++;
2713         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2714       }
2715     }
2716     ierr = PetscFree(collengths);CHKERRQ(ierr);
2717     *ia = cia; *ja = cja;
2718   }
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2724 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2725 {
2726   PetscErrorCode ierr;
2727 
2728   PetscFunctionBegin;
2729   if (!ia) PetscFunctionReturn(0);
2730   ierr = PetscFree(*ia);CHKERRQ(ierr);
2731   ierr = PetscFree(*ja);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2737 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2738 {
2739   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2740   PetscErrorCode ierr;
2741   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2742   PetscScalar    dx,*y,*xx,*w3_array;
2743   PetscScalar    *vscale_array;
2744   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2745   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2746   void           *fctx = coloring->fctx;
2747   PetscBool      flg = PETSC_FALSE;
2748   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2749   Vec            x1_tmp;
2750 
2751   PetscFunctionBegin;
2752   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2753   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2754   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2755   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2756 
2757   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2758   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2759   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2760   if (flg) {
2761     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2762   } else {
2763     PetscBool  assembled;
2764     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2765     if (assembled) {
2766       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2767     }
2768   }
2769 
2770   x1_tmp = x1;
2771   if (!coloring->vscale){
2772     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2773   }
2774 
2775   /*
2776     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2777     coloring->F for the coarser grids from the finest
2778   */
2779   if (coloring->F) {
2780     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2781     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2782     if (m1 != m2) {
2783       coloring->F = 0;
2784       }
2785     }
2786 
2787   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2788     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2789   }
2790   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2791 
2792   /* Set w1 = F(x1) */
2793   if (coloring->F) {
2794     w1          = coloring->F; /* use already computed value of function */
2795     coloring->F = 0;
2796   } else {
2797     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2798     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2799     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2800   }
2801 
2802   if (!coloring->w3) {
2803     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2804     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2805   }
2806   w3 = coloring->w3;
2807 
2808     CHKMEMQ;
2809     /* Compute all the local scale factors, including ghost points */
2810   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2811   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2812   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2813   if (ctype == IS_COLORING_GHOSTED){
2814     col_start = 0; col_end = N;
2815   } else if (ctype == IS_COLORING_GLOBAL){
2816     xx = xx - start;
2817     vscale_array = vscale_array - start;
2818     col_start = start; col_end = N + start;
2819   }    CHKMEMQ;
2820   for (col=col_start; col<col_end; col++){
2821     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2822     if (coloring->htype[0] == 'w') {
2823       dx = 1.0 + unorm;
2824     } else {
2825       dx  = xx[col];
2826     }
2827     if (dx == 0.0) dx = 1.0;
2828 #if !defined(PETSC_USE_COMPLEX)
2829     if (dx < umin && dx >= 0.0)      dx = umin;
2830     else if (dx < 0.0 && dx > -umin) dx = -umin;
2831 #else
2832     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2833     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2834 #endif
2835     dx               *= epsilon;
2836     vscale_array[col] = 1.0/dx;
2837   }     CHKMEMQ;
2838   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2839   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2840   if (ctype == IS_COLORING_GLOBAL){
2841     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843   }
2844   CHKMEMQ;
2845   if (coloring->vscaleforrow) {
2846     vscaleforrow = coloring->vscaleforrow;
2847   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2848 
2849   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2850   /*
2851     Loop over each color
2852   */
2853   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2854   for (k=0; k<coloring->ncolors; k++) {
2855     coloring->currentcolor = k;
2856     for (i=0; i<bs; i++) {
2857       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2858       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2859       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2860       /*
2861 	Loop over each column associated with color
2862 	adding the perturbation to the vector w3.
2863       */
2864       for (l=0; l<coloring->ncolumns[k]; l++) {
2865 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2866 	if (coloring->htype[0] == 'w') {
2867 	  dx = 1.0 + unorm;
2868 	} else {
2869 	  dx  = xx[col];
2870 	}
2871 	if (dx == 0.0) dx = 1.0;
2872 #if !defined(PETSC_USE_COMPLEX)
2873 	if (dx < umin && dx >= 0.0)      dx = umin;
2874 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2875 #else
2876 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2877 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2878 #endif
2879 	dx            *= epsilon;
2880 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2881 	w3_array[col] += dx;
2882       }
2883       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2884       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2885 
2886       /*
2887 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2888 	w2 = F(x1 + dx) - F(x1)
2889       */
2890       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2891       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2892       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2893       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2894 
2895       /*
2896 	Loop over rows of vector, putting results into Jacobian matrix
2897       */
2898       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2899       for (l=0; l<coloring->nrows[k]; l++) {
2900 	row    = bs*coloring->rows[k][l];             /* local row index */
2901 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2902         for (j=0; j<bs; j++) {
2903   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2904           srows[j]  = row + start + j;
2905         }
2906 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2907       }
2908       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2909     }
2910   } /* endof for each color */
2911   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2912   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2913   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2914   ierr = PetscFree(srows);CHKERRQ(ierr);
2915 
2916   coloring->currentcolor = -1;
2917   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2918   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2919   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 /* -------------------------------------------------------------------*/
2924 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2925        MatGetRow_SeqBAIJ,
2926        MatRestoreRow_SeqBAIJ,
2927        MatMult_SeqBAIJ_N,
2928 /* 4*/ MatMultAdd_SeqBAIJ_N,
2929        MatMultTranspose_SeqBAIJ,
2930        MatMultTransposeAdd_SeqBAIJ,
2931        0,
2932        0,
2933        0,
2934 /*10*/ 0,
2935        MatLUFactor_SeqBAIJ,
2936        0,
2937        0,
2938        MatTranspose_SeqBAIJ,
2939 /*15*/ MatGetInfo_SeqBAIJ,
2940        MatEqual_SeqBAIJ,
2941        MatGetDiagonal_SeqBAIJ,
2942        MatDiagonalScale_SeqBAIJ,
2943        MatNorm_SeqBAIJ,
2944 /*20*/ 0,
2945        MatAssemblyEnd_SeqBAIJ,
2946        MatSetOption_SeqBAIJ,
2947        MatZeroEntries_SeqBAIJ,
2948 /*24*/ MatZeroRows_SeqBAIJ,
2949        0,
2950        0,
2951        0,
2952        0,
2953 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2954        0,
2955        0,
2956        MatGetArray_SeqBAIJ,
2957        MatRestoreArray_SeqBAIJ,
2958 /*34*/ MatDuplicate_SeqBAIJ,
2959        0,
2960        0,
2961        MatILUFactor_SeqBAIJ,
2962        0,
2963 /*39*/ MatAXPY_SeqBAIJ,
2964        MatGetSubMatrices_SeqBAIJ,
2965        MatIncreaseOverlap_SeqBAIJ,
2966        MatGetValues_SeqBAIJ,
2967        MatCopy_SeqBAIJ,
2968 /*44*/ 0,
2969        MatScale_SeqBAIJ,
2970        0,
2971        0,
2972        MatZeroRowsColumns_SeqBAIJ,
2973 /*49*/ MatSetBlockSize_SeqBAIJ,
2974        MatGetRowIJ_SeqBAIJ,
2975        MatRestoreRowIJ_SeqBAIJ,
2976        MatGetColumnIJ_SeqBAIJ,
2977        MatRestoreColumnIJ_SeqBAIJ,
2978 /*54*/ MatFDColoringCreate_SeqAIJ,
2979        0,
2980        0,
2981        0,
2982        MatSetValuesBlocked_SeqBAIJ,
2983 /*59*/ MatGetSubMatrix_SeqBAIJ,
2984        MatDestroy_SeqBAIJ,
2985        MatView_SeqBAIJ,
2986        0,
2987        0,
2988 /*64*/ 0,
2989        0,
2990        0,
2991        0,
2992        0,
2993 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2994        0,
2995        MatConvert_Basic,
2996        0,
2997        0,
2998 /*74*/ 0,
2999        MatFDColoringApply_BAIJ,
3000        0,
3001        0,
3002        0,
3003 /*79*/ 0,
3004        0,
3005        0,
3006        0,
3007        MatLoad_SeqBAIJ,
3008 /*84*/ 0,
3009        0,
3010        0,
3011        0,
3012        0,
3013 /*89*/ 0,
3014        0,
3015        0,
3016        0,
3017        0,
3018 /*94*/ 0,
3019        0,
3020        0,
3021        0,
3022        0,
3023 /*99*/0,
3024        0,
3025        0,
3026        0,
3027        0,
3028 /*104*/0,
3029        MatRealPart_SeqBAIJ,
3030        MatImaginaryPart_SeqBAIJ,
3031        0,
3032        0,
3033 /*109*/0,
3034        0,
3035        0,
3036        0,
3037        MatMissingDiagonal_SeqBAIJ,
3038 /*114*/0,
3039        0,
3040        0,
3041        0,
3042        0,
3043 /*119*/0,
3044        0,
3045        MatMultHermitianTranspose_SeqBAIJ,
3046        MatMultHermitianTransposeAdd_SeqBAIJ,
3047        0,
3048 };
3049 
3050 EXTERN_C_BEGIN
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3053 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
3054 {
3055   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3056   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3057   PetscErrorCode ierr;
3058 
3059   PetscFunctionBegin;
3060   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3061 
3062   /* allocate space for values if not already there */
3063   if (!aij->saved_values) {
3064     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3065     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3066   }
3067 
3068   /* copy values over */
3069   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 EXTERN_C_END
3073 
3074 EXTERN_C_BEGIN
3075 #undef __FUNCT__
3076 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3077 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
3078 {
3079   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3080   PetscErrorCode ierr;
3081   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3082 
3083   PetscFunctionBegin;
3084   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3085   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3086 
3087   /* copy values over */
3088   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 EXTERN_C_END
3092 
3093 EXTERN_C_BEGIN
3094 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3095 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3096 EXTERN_C_END
3097 
3098 EXTERN_C_BEGIN
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3101 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3102 {
3103   Mat_SeqBAIJ    *b;
3104   PetscErrorCode ierr;
3105   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3106   PetscBool      flg,skipallocation = PETSC_FALSE;
3107 
3108   PetscFunctionBegin;
3109 
3110   if (nz == MAT_SKIP_ALLOCATION) {
3111     skipallocation = PETSC_TRUE;
3112     nz             = 0;
3113   }
3114 
3115   if (bs < 0) {
3116     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3117       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3118     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3119     bs   = PetscAbs(bs);
3120   }
3121   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3122   bs   = newbs;
3123 
3124   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3125   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3126   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3127   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3128 
3129   B->preallocated = PETSC_TRUE;
3130 
3131   mbs  = B->rmap->n/bs;
3132   nbs  = B->cmap->n/bs;
3133   bs2  = bs*bs;
3134 
3135   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3136 
3137   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3138   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3139   if (nnz) {
3140     for (i=0; i<mbs; i++) {
3141       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3142       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3143     }
3144   }
3145 
3146   b       = (Mat_SeqBAIJ*)B->data;
3147   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3148     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3149   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3150 
3151   if (!flg) {
3152     switch (bs) {
3153     case 1:
3154       B->ops->mult            = MatMult_SeqBAIJ_1;
3155       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3156       B->ops->sor             = MatSOR_SeqBAIJ_1;
3157       break;
3158     case 2:
3159       B->ops->mult            = MatMult_SeqBAIJ_2;
3160       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3161       B->ops->sor             = MatSOR_SeqBAIJ_2;
3162       break;
3163     case 3:
3164       B->ops->mult            = MatMult_SeqBAIJ_3;
3165       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3166       B->ops->sor             = MatSOR_SeqBAIJ_3;
3167       break;
3168     case 4:
3169       B->ops->mult            = MatMult_SeqBAIJ_4;
3170       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3171       B->ops->sor             = MatSOR_SeqBAIJ_4;
3172       break;
3173     case 5:
3174       B->ops->mult            = MatMult_SeqBAIJ_5;
3175       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3176       B->ops->sor             = MatSOR_SeqBAIJ_5;
3177       break;
3178     case 6:
3179       B->ops->mult            = MatMult_SeqBAIJ_6;
3180       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3181       B->ops->sor             = MatSOR_SeqBAIJ_6;
3182       break;
3183     case 7:
3184       B->ops->mult            = MatMult_SeqBAIJ_7;
3185       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3186       B->ops->sor             = MatSOR_SeqBAIJ_7;
3187       break;
3188     case 15:
3189       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3190       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3191       B->ops->sor             = MatSOR_SeqBAIJ_N;
3192       break;
3193     default:
3194       B->ops->mult            = MatMult_SeqBAIJ_N;
3195       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3196       B->ops->sor             = MatSOR_SeqBAIJ_N;
3197       break;
3198     }
3199   }
3200   B->rmap->bs  = bs;
3201   b->mbs       = mbs;
3202   b->nbs       = nbs;
3203   if (!skipallocation) {
3204     if (!b->imax) {
3205       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3206       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3207       b->free_imax_ilen = PETSC_TRUE;
3208     }
3209     /* b->ilen will count nonzeros in each block row so far. */
3210     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3211     if (!nnz) {
3212       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3213       else if (nz < 0) nz = 1;
3214       for (i=0; i<mbs; i++) b->imax[i] = nz;
3215       nz = nz*mbs;
3216     } else {
3217       nz = 0;
3218       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3219     }
3220 
3221     /* allocate the matrix space */
3222     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3223     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3224     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3225     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3226     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3227     b->singlemalloc = PETSC_TRUE;
3228     b->i[0] = 0;
3229     for (i=1; i<mbs+1; i++) {
3230       b->i[i] = b->i[i-1] + b->imax[i-1];
3231     }
3232     b->free_a     = PETSC_TRUE;
3233     b->free_ij    = PETSC_TRUE;
3234   } else {
3235     b->free_a     = PETSC_FALSE;
3236     b->free_ij    = PETSC_FALSE;
3237   }
3238 
3239   B->rmap->bs          = bs;
3240   b->bs2              = bs2;
3241   b->mbs              = mbs;
3242   b->nz               = 0;
3243   b->maxnz            = nz;
3244   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3245   PetscFunctionReturn(0);
3246 }
3247 EXTERN_C_END
3248 
3249 EXTERN_C_BEGIN
3250 #undef __FUNCT__
3251 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3252 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3253 {
3254   PetscInt       i,m,nz,nz_max=0,*nnz;
3255   PetscScalar    *values=0;
3256   PetscErrorCode ierr;
3257 
3258   PetscFunctionBegin;
3259   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3260   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3261   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3262   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3263   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3264   m = B->rmap->n/bs;
3265 
3266   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3267   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3268   for(i=0; i<m; i++) {
3269     nz = ii[i+1]- ii[i];
3270     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3271     nz_max = PetscMax(nz_max, nz);
3272     nnz[i] = nz;
3273   }
3274   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3275   ierr = PetscFree(nnz);CHKERRQ(ierr);
3276 
3277   values = (PetscScalar*)V;
3278   if (!values) {
3279     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3280     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3281   }
3282   for (i=0; i<m; i++) {
3283     PetscInt          ncols  = ii[i+1] - ii[i];
3284     const PetscInt    *icols = jj + ii[i];
3285     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3286     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3287   }
3288   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3289   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3291 
3292   PetscFunctionReturn(0);
3293 }
3294 EXTERN_C_END
3295 
3296 
3297 EXTERN_C_BEGIN
3298 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3299 #if defined(PETSC_HAVE_MUMPS)
3300 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3301 #endif
3302 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3303 EXTERN_C_END
3304 
3305 /*MC
3306    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3307    block sparse compressed row format.
3308 
3309    Options Database Keys:
3310 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3311 
3312   Level: beginner
3313 
3314 .seealso: MatCreateSeqBAIJ()
3315 M*/
3316 
3317 
3318 EXTERN_C_BEGIN
3319 #undef __FUNCT__
3320 #define __FUNCT__ "MatCreate_SeqBAIJ"
3321 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3322 {
3323   PetscErrorCode ierr;
3324   PetscMPIInt    size;
3325   Mat_SeqBAIJ    *b;
3326 
3327   PetscFunctionBegin;
3328   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3329   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3330 
3331   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3332   B->data = (void*)b;
3333   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3334   B->mapping               = 0;
3335   b->row                   = 0;
3336   b->col                   = 0;
3337   b->icol                  = 0;
3338   b->reallocs              = 0;
3339   b->saved_values          = 0;
3340 
3341   b->roworiented           = PETSC_TRUE;
3342   b->nonew                 = 0;
3343   b->diag                  = 0;
3344   b->solve_work            = 0;
3345   b->mult_work             = 0;
3346   B->spptr                 = 0;
3347   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3348   b->keepnonzeropattern    = PETSC_FALSE;
3349   b->xtoy                  = 0;
3350   b->XtoY                  = 0;
3351   b->compressedrow.use     = PETSC_FALSE;
3352   b->compressedrow.nrows   = 0;
3353   b->compressedrow.i       = PETSC_NULL;
3354   b->compressedrow.rindex  = PETSC_NULL;
3355   b->compressedrow.checked = PETSC_FALSE;
3356   B->same_nonzero          = PETSC_FALSE;
3357 
3358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3359                                      "MatGetFactorAvailable_seqbaij_petsc",
3360                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3362                                      "MatGetFactor_seqbaij_petsc",
3363                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3364 #if defined(PETSC_HAVE_MUMPS)
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3366 #endif
3367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3368                                      "MatInvertBlockDiagonal_SeqBAIJ",
3369                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3371                                      "MatStoreValues_SeqBAIJ",
3372                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3374                                      "MatRetrieveValues_SeqBAIJ",
3375                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3377                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3378                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3380                                      "MatConvert_SeqBAIJ_SeqAIJ",
3381                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3383                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3384                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3386                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3387                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3389                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3390                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3391   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3392   PetscFunctionReturn(0);
3393 }
3394 EXTERN_C_END
3395 
3396 #undef __FUNCT__
3397 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3398 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3399 {
3400   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3401   PetscErrorCode ierr;
3402   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3403 
3404   PetscFunctionBegin;
3405   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3406 
3407   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3408     c->imax = a->imax;
3409     c->ilen = a->ilen;
3410     c->free_imax_ilen = PETSC_FALSE;
3411   } else {
3412     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3413     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3414     for (i=0; i<mbs; i++) {
3415       c->imax[i] = a->imax[i];
3416       c->ilen[i] = a->ilen[i];
3417     }
3418     c->free_imax_ilen = PETSC_TRUE;
3419   }
3420 
3421   /* allocate the matrix space */
3422   if (mallocmatspace){
3423     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3424       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3425       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3426       c->singlemalloc = PETSC_FALSE;
3427       c->free_ij      = PETSC_FALSE;
3428       c->i            = a->i;
3429       c->j            = a->j;
3430       c->parent       = A;
3431       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3432       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3433       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3434     } else {
3435       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3436       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3437       c->singlemalloc = PETSC_TRUE;
3438       c->free_ij      = PETSC_TRUE;
3439       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3440       if (mbs > 0) {
3441 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3442 	if (cpvalues == MAT_COPY_VALUES) {
3443 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3444 	} else {
3445 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3446 	}
3447       }
3448     }
3449   }
3450 
3451   c->roworiented = a->roworiented;
3452   c->nonew       = a->nonew;
3453   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3454   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3455   c->bs2         = a->bs2;
3456   c->mbs         = a->mbs;
3457   c->nbs         = a->nbs;
3458 
3459   if (a->diag) {
3460     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3461       c->diag      = a->diag;
3462       c->free_diag = PETSC_FALSE;
3463     } else {
3464       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3465       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3466       for (i=0; i<mbs; i++) {
3467         c->diag[i] = a->diag[i];
3468       }
3469       c->free_diag = PETSC_TRUE;
3470     }
3471   } else c->diag        = 0;
3472   c->nz                 = a->nz;
3473   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3474   c->solve_work         = 0;
3475   c->mult_work          = 0;
3476   c->free_a             = PETSC_TRUE;
3477   c->free_ij            = PETSC_TRUE;
3478   C->preallocated       = PETSC_TRUE;
3479   C->assembled          = PETSC_TRUE;
3480 
3481   c->compressedrow.use     = a->compressedrow.use;
3482   c->compressedrow.nrows   = a->compressedrow.nrows;
3483   c->compressedrow.checked = a->compressedrow.checked;
3484   if (a->compressedrow.checked && a->compressedrow.use){
3485     i = a->compressedrow.nrows;
3486     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3487     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3488     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3489     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3490   } else {
3491     c->compressedrow.use    = PETSC_FALSE;
3492     c->compressedrow.i      = PETSC_NULL;
3493     c->compressedrow.rindex = PETSC_NULL;
3494   }
3495   C->same_nonzero = A->same_nonzero;
3496   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3497   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3503 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3504 {
3505     PetscErrorCode ierr;
3506 
3507   PetscFunctionBegin;
3508   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3509   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3510   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3511   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 #undef __FUNCT__
3516 #define __FUNCT__ "MatLoad_SeqBAIJ"
3517 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3518 {
3519   Mat_SeqBAIJ    *a;
3520   PetscErrorCode ierr;
3521   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3522   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3523   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3524   PetscInt       *masked,nmask,tmp,bs2,ishift;
3525   PetscMPIInt    size;
3526   int            fd;
3527   PetscScalar    *aa;
3528   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3529 
3530   PetscFunctionBegin;
3531   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3532   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3533   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3534   bs2  = bs*bs;
3535 
3536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3537   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3538   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3539   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3540   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3541   M = header[1]; N = header[2]; nz = header[3];
3542 
3543   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3544   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3545 
3546   /*
3547      This code adds extra rows to make sure the number of rows is
3548     divisible by the blocksize
3549   */
3550   mbs        = M/bs;
3551   extra_rows = bs - M + bs*(mbs);
3552   if (extra_rows == bs) extra_rows = 0;
3553   else                  mbs++;
3554   if (extra_rows) {
3555     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3556   }
3557 
3558   /* Set global sizes if not already set */
3559   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3560     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3561   } else { /* Check if the matrix global sizes are correct */
3562     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3563     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3564   }
3565 
3566   /* read in row lengths */
3567   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3568   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3569   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3570 
3571   /* read in column indices */
3572   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3573   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3574   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3575 
3576   /* loop over row lengths determining block row lengths */
3577   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3578   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3579   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3580   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3581   rowcount = 0;
3582   nzcount = 0;
3583   for (i=0; i<mbs; i++) {
3584     nmask = 0;
3585     for (j=0; j<bs; j++) {
3586       kmax = rowlengths[rowcount];
3587       for (k=0; k<kmax; k++) {
3588         tmp = jj[nzcount++]/bs;
3589         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3590       }
3591       rowcount++;
3592     }
3593     browlengths[i] += nmask;
3594     /* zero out the mask elements we set */
3595     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3596   }
3597 
3598   /* Do preallocation  */
3599   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3600   a = (Mat_SeqBAIJ*)newmat->data;
3601 
3602   /* set matrix "i" values */
3603   a->i[0] = 0;
3604   for (i=1; i<= mbs; i++) {
3605     a->i[i]      = a->i[i-1] + browlengths[i-1];
3606     a->ilen[i-1] = browlengths[i-1];
3607   }
3608   a->nz         = 0;
3609   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3610 
3611   /* read in nonzero values */
3612   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3613   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3614   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3615 
3616   /* set "a" and "j" values into matrix */
3617   nzcount = 0; jcount = 0;
3618   for (i=0; i<mbs; i++) {
3619     nzcountb = nzcount;
3620     nmask    = 0;
3621     for (j=0; j<bs; j++) {
3622       kmax = rowlengths[i*bs+j];
3623       for (k=0; k<kmax; k++) {
3624         tmp = jj[nzcount++]/bs;
3625 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3626       }
3627     }
3628     /* sort the masked values */
3629     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3630 
3631     /* set "j" values into matrix */
3632     maskcount = 1;
3633     for (j=0; j<nmask; j++) {
3634       a->j[jcount++]  = masked[j];
3635       mask[masked[j]] = maskcount++;
3636     }
3637     /* set "a" values into matrix */
3638     ishift = bs2*a->i[i];
3639     for (j=0; j<bs; j++) {
3640       kmax = rowlengths[i*bs+j];
3641       for (k=0; k<kmax; k++) {
3642         tmp       = jj[nzcountb]/bs ;
3643         block     = mask[tmp] - 1;
3644         point     = jj[nzcountb] - bs*tmp;
3645         idx       = ishift + bs2*block + j + bs*point;
3646         a->a[idx] = (MatScalar)aa[nzcountb++];
3647       }
3648     }
3649     /* zero out the mask elements we set */
3650     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3651   }
3652   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3653 
3654   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3655   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3656   ierr = PetscFree(aa);CHKERRQ(ierr);
3657   ierr = PetscFree(jj);CHKERRQ(ierr);
3658   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3659 
3660   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 #undef __FUNCT__
3667 #define __FUNCT__ "MatCreateSeqBAIJ"
3668 /*@C
3669    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3670    compressed row) format.  For good matrix assembly performance the
3671    user should preallocate the matrix storage by setting the parameter nz
3672    (or the array nnz).  By setting these parameters accurately, performance
3673    during matrix assembly can be increased by more than a factor of 50.
3674 
3675    Collective on MPI_Comm
3676 
3677    Input Parameters:
3678 +  comm - MPI communicator, set to PETSC_COMM_SELF
3679 .  bs - size of block
3680 .  m - number of rows
3681 .  n - number of columns
3682 .  nz - number of nonzero blocks  per block row (same for all rows)
3683 -  nnz - array containing the number of nonzero blocks in the various block rows
3684          (possibly different for each block row) or PETSC_NULL
3685 
3686    Output Parameter:
3687 .  A - the matrix
3688 
3689    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3690    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3691    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3692 
3693    Options Database Keys:
3694 .   -mat_no_unroll - uses code that does not unroll the loops in the
3695                      block calculations (much slower)
3696 .    -mat_block_size - size of the blocks to use
3697 
3698    Level: intermediate
3699 
3700    Notes:
3701    The number of rows and columns must be divisible by blocksize.
3702 
3703    If the nnz parameter is given then the nz parameter is ignored
3704 
3705    A nonzero block is any block that as 1 or more nonzeros in it
3706 
3707    The block AIJ format is fully compatible with standard Fortran 77
3708    storage.  That is, the stored row and column indices can begin at
3709    either one (as in Fortran) or zero.  See the users' manual for details.
3710 
3711    Specify the preallocated storage with either nz or nnz (not both).
3712    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3713    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3714    matrices.
3715 
3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3717 @*/
3718 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3719 {
3720   PetscErrorCode ierr;
3721 
3722   PetscFunctionBegin;
3723   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3724   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3725   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3726   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3727   PetscFunctionReturn(0);
3728 }
3729 
3730 #undef __FUNCT__
3731 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3732 /*@C
3733    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3734    per row in the matrix. For good matrix assembly performance the
3735    user should preallocate the matrix storage by setting the parameter nz
3736    (or the array nnz).  By setting these parameters accurately, performance
3737    during matrix assembly can be increased by more than a factor of 50.
3738 
3739    Collective on MPI_Comm
3740 
3741    Input Parameters:
3742 +  A - the matrix
3743 .  bs - size of block
3744 .  nz - number of block nonzeros per block row (same for all rows)
3745 -  nnz - array containing the number of block nonzeros in the various block rows
3746          (possibly different for each block row) or PETSC_NULL
3747 
3748    Options Database Keys:
3749 .   -mat_no_unroll - uses code that does not unroll the loops in the
3750                      block calculations (much slower)
3751 .    -mat_block_size - size of the blocks to use
3752 
3753    Level: intermediate
3754 
3755    Notes:
3756    If the nnz parameter is given then the nz parameter is ignored
3757 
3758    You can call MatGetInfo() to get information on how effective the preallocation was;
3759    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3760    You can also run with the option -info and look for messages with the string
3761    malloc in them to see if additional memory allocation was needed.
3762 
3763    The block AIJ format is fully compatible with standard Fortran 77
3764    storage.  That is, the stored row and column indices can begin at
3765    either one (as in Fortran) or zero.  See the users' manual for details.
3766 
3767    Specify the preallocated storage with either nz or nnz (not both).
3768    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3769    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3770 
3771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3772 @*/
3773 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3774 {
3775   PetscErrorCode ierr;
3776 
3777   PetscFunctionBegin;
3778   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3779   PetscFunctionReturn(0);
3780 }
3781 
3782 #undef __FUNCT__
3783 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3784 /*@C
3785    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3786    (the default sequential PETSc format).
3787 
3788    Collective on MPI_Comm
3789 
3790    Input Parameters:
3791 +  A - the matrix
3792 .  i - the indices into j for the start of each local row (starts with zero)
3793 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3794 -  v - optional values in the matrix
3795 
3796    Level: developer
3797 
3798 .keywords: matrix, aij, compressed row, sparse
3799 
3800 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3801 @*/
3802 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3803 {
3804   PetscErrorCode ierr;
3805 
3806   PetscFunctionBegin;
3807   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3808   PetscFunctionReturn(0);
3809 }
3810 
3811 
3812 #undef __FUNCT__
3813 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3814 /*@
3815      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3816 
3817      Collective on MPI_Comm
3818 
3819    Input Parameters:
3820 +  comm - must be an MPI communicator of size 1
3821 .  bs - size of block
3822 .  m - number of rows
3823 .  n - number of columns
3824 .  i - row indices
3825 .  j - column indices
3826 -  a - matrix values
3827 
3828    Output Parameter:
3829 .  mat - the matrix
3830 
3831    Level: advanced
3832 
3833    Notes:
3834        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3835     once the matrix is destroyed
3836 
3837        You cannot set new nonzero locations into this matrix, that will generate an error.
3838 
3839        The i and j indices are 0 based
3840 
3841        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3842 
3843 
3844 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3845 
3846 @*/
3847 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3848 {
3849   PetscErrorCode ierr;
3850   PetscInt       ii;
3851   Mat_SeqBAIJ    *baij;
3852 
3853   PetscFunctionBegin;
3854   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3855   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3856 
3857   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3858   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3859   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3860   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3861   baij = (Mat_SeqBAIJ*)(*mat)->data;
3862   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3863   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3864 
3865   baij->i = i;
3866   baij->j = j;
3867   baij->a = a;
3868   baij->singlemalloc = PETSC_FALSE;
3869   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3870   baij->free_a       = PETSC_FALSE;
3871   baij->free_ij       = PETSC_FALSE;
3872 
3873   for (ii=0; ii<m; ii++) {
3874     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3875 #if defined(PETSC_USE_DEBUG)
3876     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3877 #endif
3878   }
3879 #if defined(PETSC_USE_DEBUG)
3880   for (ii=0; ii<baij->i[m]; ii++) {
3881     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3882     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3883   }
3884 #endif
3885 
3886   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3888   PetscFunctionReturn(0);
3889 }
3890