xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b2573a8ae65ee0f826ad4534e36923d41f9eb9b8)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0]   = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v  = aa + ai[i];
162         vi = aj + ai[i];
163         nz = diag[i] - ai[i];
164         s1 = b[i2];
165         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
166         x[i2]  = idiag[0]*s1;
167         idiag += 1;
168         i2    += 1;
169       }
170       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
171       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
172     }
173     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
174         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
175       i2    = 0;
176       mdiag = a->idiag+a->mbs;
177       for (i=0; i<m; i++) {
178         x1     = x[i2];
179         x[i2]  = mdiag[0]*x1;
180         mdiag += 1;
181         i2    += 1;
182       }
183       ierr = PetscLogFlops(m);CHKERRQ(ierr);
184     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
185       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
186     }
187     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
188       idiag  = a->idiag+a->mbs - 1;
189       i2     = m - 1;
190       x1     = x[i2];
191       x[i2]  = idiag[0]*x1;
192       idiag -= 1;
193       i2    -= 1;
194       for (i=m-2; i>=0; i--) {
195         v  = aa + (diag[i]+1);
196         vi = aj + diag[i] + 1;
197         nz = ai[i+1] - diag[i] - 1;
198         s1 = x[i2];
199         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
200         x[i2]  = idiag[0]*s1;
201         idiag -= 1;
202         i2    -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215 {
216   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
217   PetscScalar       *x,x1,x2,s1,s2;
218   const PetscScalar *b;
219   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
220   PetscErrorCode    ierr;
221   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
222   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
223 
224   PetscFunctionBegin;
225   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
226   its = its*lits;
227   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
228   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
229   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
230   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
231   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232 
233   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
234 
235   diag  = a->diag;
236   idiag = a->idiag;
237   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
238   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
239 
240   if (flag & SOR_ZERO_INITIAL_GUESS) {
241     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
242       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
243       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
244       i2     = 2;
245       idiag += 4;
246       for (i=1; i<m; i++) {
247         v  = aa + 4*ai[i];
248         vi = aj + ai[i];
249         nz = diag[i] - ai[i];
250         s1 = b[i2]; s2 = b[i2+1];
251         for (j=0; j<nz; j++) {
252           idx = 2*vi[j];
253           it  = 4*j;
254           x1  = x[idx]; x2 = x[1+idx];
255           s1 -= v[it]*x1 + v[it+2]*x2;
256           s2 -= v[it+1]*x1 + v[it+3]*x2;
257         }
258         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
259         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
260         idiag  += 4;
261         i2     += 2;
262       }
263       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
264       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
265     }
266     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
267         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
268       i2    = 0;
269       mdiag = a->idiag+4*a->mbs;
270       for (i=0; i<m; i++) {
271         x1      = x[i2]; x2 = x[i2+1];
272         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
273         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
274         mdiag  += 4;
275         i2     += 2;
276       }
277       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
278     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
279       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
280     }
281     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       idiag   = a->idiag+4*a->mbs - 4;
283       i2      = 2*m - 2;
284       x1      = x[i2]; x2 = x[i2+1];
285       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
286       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
287       idiag  -= 4;
288       i2     -= 2;
289       for (i=m-2; i>=0; i--) {
290         v  = aa + 4*(diag[i]+1);
291         vi = aj + diag[i] + 1;
292         nz = ai[i+1] - diag[i] - 1;
293         s1 = x[i2]; s2 = x[i2+1];
294         for (j=0; j<nz; j++) {
295           idx = 2*vi[j];
296           it  = 4*j;
297           x1  = x[idx]; x2 = x[1+idx];
298           s1 -= v[it]*x1 + v[it+2]*x2;
299           s2 -= v[it+1]*x1 + v[it+3]*x2;
300         }
301         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
302         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
303         idiag  -= 4;
304         i2     -= 2;
305       }
306       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
307     }
308   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
317 {
318   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
319   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
320   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
321   const PetscScalar *b;
322   PetscErrorCode    ierr;
323   PetscInt          m = a->mbs,i,i2,nz,idx;
324   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
325 
326   PetscFunctionBegin;
327   its = its*lits;
328   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
329   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
334 
335   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
336 
337   diag  = a->diag;
338   idiag = a->idiag;
339   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
341 
342   if (flag & SOR_ZERO_INITIAL_GUESS) {
343     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
344       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347       i2     = 3;
348       idiag += 9;
349       for (i=1; i<m; i++) {
350         v  = aa + 9*ai[i];
351         vi = aj + ai[i];
352         nz = diag[i] - ai[i];
353         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354         while (nz--) {
355           idx = 3*(*vi++);
356           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360           v  += 9;
361         }
362         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365         idiag  += 9;
366         i2     += 3;
367       }
368       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
370     }
371     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373       i2    = 0;
374       mdiag = a->idiag+9*a->mbs;
375       for (i=0; i<m; i++) {
376         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380         mdiag  += 9;
381         i2     += 3;
382       }
383       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
384     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
386     }
387     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       idiag   = a->idiag+9*a->mbs - 9;
389       i2      = 3*m - 3;
390       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394       idiag  -= 9;
395       i2     -= 3;
396       for (i=m-2; i>=0; i--) {
397         v  = aa + 9*(diag[i]+1);
398         vi = aj + diag[i] + 1;
399         nz = ai[i+1] - diag[i] - 1;
400         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401         while (nz--) {
402           idx = 3*(*vi++);
403           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407           v  += 9;
408         }
409         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412         idiag  -= 9;
413         i2     -= 3;
414       }
415       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
416     }
417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
418   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
429   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
430   const PetscScalar *b;
431   PetscErrorCode    ierr;
432   PetscInt          m = a->mbs,i,i2,nz,idx;
433   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
434 
435   PetscFunctionBegin;
436   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
437   its = its*lits;
438   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
439   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
440   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
441   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
442   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
443 
444   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
445 
446   diag  = a->diag;
447   idiag = a->idiag;
448   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
449   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
450 
451   if (flag & SOR_ZERO_INITIAL_GUESS) {
452     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
453       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
454       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
455       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
456       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
457       i2     = 4;
458       idiag += 16;
459       for (i=1; i<m; i++) {
460         v  = aa + 16*ai[i];
461         vi = aj + ai[i];
462         nz = diag[i] - ai[i];
463         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
464         while (nz--) {
465           idx = 4*(*vi++);
466           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
467           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
468           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
469           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
470           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
471           v  += 16;
472         }
473         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
474         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
475         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
476         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
477         idiag  += 16;
478         i2     += 4;
479       }
480       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
481       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
482     }
483     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
484         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
485       i2    = 0;
486       mdiag = a->idiag+16*a->mbs;
487       for (i=0; i<m; i++) {
488         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
489         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
490         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
491         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
492         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
493         mdiag  += 16;
494         i2     += 4;
495       }
496       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
497     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
498       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
499     }
500     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
501       idiag   = a->idiag+16*a->mbs - 16;
502       i2      = 4*m - 4;
503       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
505       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
506       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
507       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
508       idiag  -= 16;
509       i2     -= 4;
510       for (i=m-2; i>=0; i--) {
511         v  = aa + 16*(diag[i]+1);
512         vi = aj + diag[i] + 1;
513         nz = ai[i+1] - diag[i] - 1;
514         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
515         while (nz--) {
516           idx = 4*(*vi++);
517           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
518           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
519           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
520           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
521           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
522           v  += 16;
523         }
524         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
525         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
526         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
527         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
528         idiag  -= 16;
529         i2     -= 4;
530       }
531       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
532     }
533   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
545   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
546   const PetscScalar *b;
547   PetscErrorCode    ierr;
548   PetscInt          m = a->mbs,i,i2,nz,idx;
549   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
550 
551   PetscFunctionBegin;
552   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
553   its = its*lits;
554   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
555   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
556   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
557   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
558   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
559 
560   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
561 
562   diag  = a->diag;
563   idiag = a->idiag;
564   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
565   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
570       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
571       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
572       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
573       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
574       i2     = 5;
575       idiag += 25;
576       for (i=1; i<m; i++) {
577         v  = aa + 25*ai[i];
578         vi = aj + ai[i];
579         nz = diag[i] - ai[i];
580         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
581         while (nz--) {
582           idx = 5*(*vi++);
583           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
584           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
585           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
586           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
587           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
588           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
589           v  += 25;
590         }
591         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
592         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
593         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
594         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
595         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
596         idiag  += 25;
597         i2     += 5;
598       }
599       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
600       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
601     }
602     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
603         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
604       i2    = 0;
605       mdiag = a->idiag+25*a->mbs;
606       for (i=0; i<m; i++) {
607         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
608         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
609         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
610         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
611         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
612         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
613         mdiag  += 25;
614         i2     += 5;
615       }
616       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
617     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
618       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
619     }
620     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
621       idiag   = a->idiag+25*a->mbs - 25;
622       i2      = 5*m - 5;
623       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
624       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
625       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
626       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
627       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
628       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
629       idiag  -= 25;
630       i2     -= 5;
631       for (i=m-2; i>=0; i--) {
632         v  = aa + 25*(diag[i]+1);
633         vi = aj + diag[i] + 1;
634         nz = ai[i+1] - diag[i] - 1;
635         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
636         while (nz--) {
637           idx = 5*(*vi++);
638           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
639           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
640           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
641           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
642           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
643           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
644           v  += 25;
645         }
646         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
647         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
648         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
649         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
650         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
651         idiag  -= 25;
652         i2     -= 5;
653       }
654       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
655     }
656   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
657   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
658   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
665 {
666   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
667   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
668   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
669   const PetscScalar *b;
670   PetscErrorCode    ierr;
671   PetscInt          m = a->mbs,i,i2,nz,idx;
672   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
673 
674   PetscFunctionBegin;
675   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
676   its = its*lits;
677   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
678   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
679   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
680   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
681   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
682 
683   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
684 
685   diag  = a->diag;
686   idiag = a->idiag;
687   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
689 
690   if (flag & SOR_ZERO_INITIAL_GUESS) {
691     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
692       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
693       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
694       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
695       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
696       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
697       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
698       i2     = 6;
699       idiag += 36;
700       for (i=1; i<m; i++) {
701         v  = aa + 36*ai[i];
702         vi = aj + ai[i];
703         nz = diag[i] - ai[i];
704         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
705         while (nz--) {
706           idx = 6*(*vi++);
707           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
708           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
709           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
710           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
711           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
712           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
713           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
714           v  += 36;
715         }
716         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
717         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
718         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
719         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
720         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
721         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
722         idiag  += 36;
723         i2     += 6;
724       }
725       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
726       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
727     }
728     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
729         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
730       i2    = 0;
731       mdiag = a->idiag+36*a->mbs;
732       for (i=0; i<m; i++) {
733         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
734         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
735         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
736         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
737         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
738         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
739         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
740         mdiag  += 36;
741         i2     += 6;
742       }
743       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
744     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
745       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
746     }
747     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748       idiag   = a->idiag+36*a->mbs - 36;
749       i2      = 6*m - 6;
750       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
751       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
752       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
753       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
754       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
755       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
756       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
757       idiag  -= 36;
758       i2     -= 6;
759       for (i=m-2; i>=0; i--) {
760         v  = aa + 36*(diag[i]+1);
761         vi = aj + diag[i] + 1;
762         nz = ai[i+1] - diag[i] - 1;
763         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
764         while (nz--) {
765           idx = 6*(*vi++);
766           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
767           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
768           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
769           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
770           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
771           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
772           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
773           v  += 36;
774         }
775         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
776         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
777         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
778         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
779         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
780         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
781         idiag  -= 36;
782         i2     -= 6;
783       }
784       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
785     }
786   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
787   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
788   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
795 {
796   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
797   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
798   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
799   const PetscScalar *b;
800   PetscErrorCode    ierr;
801   PetscInt          m = a->mbs,i,i2,nz,idx;
802   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
803 
804   PetscFunctionBegin;
805   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
806   its = its*lits;
807   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
808   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
809   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
810   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
811   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
812 
813   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
814 
815   diag  = a->diag;
816   idiag = a->idiag;
817   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
819 
820   if (flag & SOR_ZERO_INITIAL_GUESS) {
821     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
822       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
823       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
824       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
825       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
826       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
827       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
828       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
829       i2     = 7;
830       idiag += 49;
831       for (i=1; i<m; i++) {
832         v  = aa + 49*ai[i];
833         vi = aj + ai[i];
834         nz = diag[i] - ai[i];
835         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
836         while (nz--) {
837           idx = 7*(*vi++);
838           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
839           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
840           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
841           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
842           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
843           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
844           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
845           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
846           v  += 49;
847         }
848         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
849         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
850         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
851         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
852         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
853         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
854         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
855         idiag  += 49;
856         i2     += 7;
857       }
858       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
859       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
860     }
861     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
862         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
863       i2    = 0;
864       mdiag = a->idiag+49*a->mbs;
865       for (i=0; i<m; i++) {
866         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
867         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
868         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
869         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
870         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
871         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
872         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
873         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
874         mdiag  += 49;
875         i2     += 7;
876       }
877       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
878     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
880     }
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag   = a->idiag+49*a->mbs - 49;
883       i2      = 7*m - 7;
884       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
885       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
886       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
887       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
888       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
889       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
890       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
891       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
892       idiag  -= 49;
893       i2     -= 7;
894       for (i=m-2; i>=0; i--) {
895         v  = aa + 49*(diag[i]+1);
896         vi = aj + diag[i] + 1;
897         nz = ai[i+1] - diag[i] - 1;
898         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
899         while (nz--) {
900           idx = 7*(*vi++);
901           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
902           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
903           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
904           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
905           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
906           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
907           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
908           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
909           v  += 49;
910         }
911         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
912         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
913         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
914         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
915         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
916         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
917         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
918         idiag  -= 49;
919         i2     -= 7;
920       }
921       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
922     }
923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
932 {
933   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
934   PetscScalar       *x,*work,*w,*workt;
935   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
936   const PetscScalar *b;
937   PetscErrorCode    ierr;
938   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
939   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
940 
941   PetscFunctionBegin;
942   its = its*lits;
943   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
944   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
945   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
946   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
947   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
948   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
949 
950   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
951 
952   diag  = a->diag;
953   idiag = a->idiag;
954   if (!a->mult_work) {
955     k    = PetscMax(A->rmap->n,A->cmap->n);
956     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
957   }
958   work = a->mult_work;
959   if (!a->sor_work) {
960     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
961   }
962   w = a->sor_work;
963 
964   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
966 
967   if (flag & SOR_ZERO_INITIAL_GUESS) {
968     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
969       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
970       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
971       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
972       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
973       i2     = bs;
974       idiag += bs2;
975       for (i=1; i<m; i++) {
976         v  = aa + bs2*ai[i];
977         vi = aj + ai[i];
978         nz = diag[i] - ai[i];
979 
980         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
981         /* copy all rows of x that are needed into contiguous space */
982         workt = work;
983         for (j=0; j<nz; j++) {
984           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
985           workt += bs;
986         }
987         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
988         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
989          while (nz--) {
990            idx  = N*(*vi++);
991            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
992            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
993            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
994            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
995            v   += N2;
996            } */
997 
998         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
999         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1000         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1001         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1002 
1003         idiag += bs2;
1004         i2    += bs;
1005       }
1006       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1007       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1008     }
1009     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1010         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1011       i2    = 0;
1012       mdiag = a->idiag+bs2*a->mbs;
1013       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1014       for (i=0; i<m; i++) {
1015         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1016         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1017         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1018         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1019         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1020 
1021         mdiag += bs2;
1022         i2    += bs;
1023       }
1024       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1025     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1026       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1027     }
1028     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029       idiag = a->idiag+bs2*a->mbs - bs2;
1030       i2    = bs*m - bs;
1031       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1033       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1034       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1035       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1036       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1037       idiag -= bs2;
1038       i2    -= bs;
1039       for (i=m-2; i>=0; i--) {
1040         v  = aa + bs2*(diag[i]+1);
1041         vi = aj + diag[i] + 1;
1042         nz = ai[i+1] - diag[i] - 1;
1043 
1044         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1045         /* copy all rows of x that are needed into contiguous space */
1046         workt = work;
1047         for (j=0; j<nz; j++) {
1048           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049           workt += bs;
1050         }
1051         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1052         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1053         while (nz--) {
1054           idx  = N*(*vi++);
1055           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1056           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1057           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1058           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1059           v   += N2;
1060           } */
1061 
1062         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1063         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1064         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1065         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1066         idiag -= bs2;
1067         i2    -= bs;
1068       }
1069       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1070     }
1071   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1072   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1073   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1079 */
1080 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1083 #define matsetvaluesblocked4_ matsetvaluesblocked4
1084 #endif
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "matsetvaluesblocked4_"
1089 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1090 {
1091   Mat               A  = *AA;
1092   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1093   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1094   PetscInt          *ai    =a->i,*ailen=a->ilen;
1095   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1096   const PetscScalar *value = v;
1097   MatScalar         *ap,*aa = a->a,*bap;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1101   stepval = (n-1)*4;
1102   for (k=0; k<m; k++) { /* loop over added rows */
1103     row  = im[k];
1104     rp   = aj + ai[row];
1105     ap   = aa + 16*ai[row];
1106     nrow = ailen[row];
1107     low  = 0;
1108     high = nrow;
1109     for (l=0; l<n; l++) { /* loop over added columns */
1110       col = in[l];
1111       if (col <= lastcol)  low = 0;
1112       else                high = nrow;
1113       lastcol = col;
1114       value   = v + k*(stepval+4 + l)*4;
1115       while (high-low > 7) {
1116         t = (low+high)/2;
1117         if (rp[t] > col) high = t;
1118         else             low  = t;
1119       }
1120       for (i=low; i<high; i++) {
1121         if (rp[i] > col) break;
1122         if (rp[i] == col) {
1123           bap = ap +  16*i;
1124           for (ii=0; ii<4; ii++,value+=stepval) {
1125             for (jj=ii; jj<16; jj+=4) {
1126               bap[jj] += *value++;
1127             }
1128           }
1129           goto noinsert2;
1130         }
1131       }
1132       N = nrow++ - 1;
1133       high++; /* added new column index thus must search to one higher than before */
1134       /* shift up all the later entries in this row */
1135       for (ii=N; ii>=i; ii--) {
1136         rp[ii+1] = rp[ii];
1137         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1138       }
1139       if (N >= i) {
1140         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1141       }
1142       rp[i] = col;
1143       bap   = ap +  16*i;
1144       for (ii=0; ii<4; ii++,value+=stepval) {
1145         for (jj=ii; jj<16; jj+=4) {
1146           bap[jj] = *value++;
1147         }
1148       }
1149       noinsert2:;
1150       low = i;
1151     }
1152     ailen[row] = nrow;
1153   }
1154   PetscFunctionReturnVoid();
1155 }
1156 EXTERN_C_END
1157 
1158 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1159 #define matsetvalues4_ MATSETVALUES4
1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1161 #define matsetvalues4_ matsetvalues4
1162 #endif
1163 
1164 EXTERN_C_BEGIN
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatSetValues4_"
1167 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1168 {
1169   Mat         A  = *AA;
1170   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1171   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1172   PetscInt    *ai=a->i,*ailen=a->ilen;
1173   PetscInt    *aj=a->j,brow,bcol;
1174   PetscInt    ridx,cidx,lastcol = -1;
1175   MatScalar   *ap,value,*aa=a->a,*bap;
1176 
1177   PetscFunctionBegin;
1178   for (k=0; k<m; k++) { /* loop over added rows */
1179     row  = im[k]; brow = row/4;
1180     rp   = aj + ai[brow];
1181     ap   = aa + 16*ai[brow];
1182     nrow = ailen[brow];
1183     low  = 0;
1184     high = nrow;
1185     for (l=0; l<n; l++) { /* loop over added columns */
1186       col   = in[l]; bcol = col/4;
1187       ridx  = row % 4; cidx = col % 4;
1188       value = v[l + k*n];
1189       if (col <= lastcol)  low = 0;
1190       else                high = nrow;
1191       lastcol = col;
1192       while (high-low > 7) {
1193         t = (low+high)/2;
1194         if (rp[t] > bcol) high = t;
1195         else              low  = t;
1196       }
1197       for (i=low; i<high; i++) {
1198         if (rp[i] > bcol) break;
1199         if (rp[i] == bcol) {
1200           bap   = ap +  16*i + 4*cidx + ridx;
1201           *bap += value;
1202           goto noinsert1;
1203         }
1204       }
1205       N = nrow++ - 1;
1206       high++; /* added new column thus must search to one higher than before */
1207       /* shift up all the later entries in this row */
1208       for (ii=N; ii>=i; ii--) {
1209         rp[ii+1] = rp[ii];
1210         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1211       }
1212       if (N>=i) {
1213         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1214       }
1215       rp[i]                    = bcol;
1216       ap[16*i + 4*cidx + ridx] = value;
1217 noinsert1:;
1218       low = i;
1219     }
1220     ailen[brow] = nrow;
1221   }
1222   PetscFunctionReturnVoid();
1223 }
1224 EXTERN_C_END
1225 
1226 /*
1227      Checks for missing diagonals
1228 */
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1232 {
1233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       *diag,*jj = a->j,i;
1236 
1237   PetscFunctionBegin;
1238   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1239   *missing = PETSC_FALSE;
1240   if (A->rmap->n > 0 && !jj) {
1241     *missing = PETSC_TRUE;
1242     if (d) *d = 0;
1243     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1244   } else {
1245     diag = a->diag;
1246     for (i=0; i<a->mbs; i++) {
1247       if (jj[diag[i]] != i) {
1248         *missing = PETSC_TRUE;
1249         if (d) *d = i;
1250         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1251         break;
1252       }
1253     }
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1261 {
1262   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264   PetscInt       i,j,m = a->mbs;
1265 
1266   PetscFunctionBegin;
1267   if (!a->diag) {
1268     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1269     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1270     a->free_diag = PETSC_TRUE;
1271   }
1272   for (i=0; i<m; i++) {
1273     a->diag[i] = a->i[i+1];
1274     for (j=a->i[i]; j<a->i[i+1]; j++) {
1275       if (a->j[j] == i) {
1276         a->diag[i] = j;
1277         break;
1278       }
1279     }
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1288 {
1289   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1290   PetscErrorCode ierr;
1291   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1292   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1293 
1294   PetscFunctionBegin;
1295   *nn = n;
1296   if (!ia) PetscFunctionReturn(0);
1297   if (symmetric) {
1298     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1299     nz   = tia[n];
1300   } else {
1301     tia = a->i; tja = a->j;
1302   }
1303 
1304   if (!blockcompressed && bs > 1) {
1305     (*nn) *= bs;
1306     /* malloc & create the natural set of indices */
1307     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1308     if (n) {
1309       (*ia)[0] = 0;
1310       for (j=1; j<bs; j++) {
1311         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1312       }
1313     }
1314 
1315     for (i=1; i<n; i++) {
1316       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1317       for (j=1; j<bs; j++) {
1318         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1319       }
1320     }
1321     if (n) {
1322       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1323     }
1324 
1325     if (inja) {
1326       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1327       cnt = 0;
1328       for (i=0; i<n; i++) {
1329         for (j=0; j<bs; j++) {
1330           for (k=tia[i]; k<tia[i+1]; k++) {
1331             for (l=0; l<bs; l++) {
1332               (*ja)[cnt++] = bs*tja[k] + l;
1333             }
1334           }
1335         }
1336       }
1337     }
1338 
1339     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1340       ierr = PetscFree(tia);CHKERRQ(ierr);
1341       ierr = PetscFree(tja);CHKERRQ(ierr);
1342     }
1343   } else if (oshift == 1) {
1344     if (symmetric) {
1345       nz = tia[A->rmap->n/bs];
1346       /*  add 1 to i and j indices */
1347       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1348       *ia = tia;
1349       if (ja) {
1350         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1351         *ja = tja;
1352       }
1353     } else {
1354       nz = a->i[A->rmap->n/bs];
1355       /* malloc space and  add 1 to i and j indices */
1356       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1357       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1358       if (ja) {
1359         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1360         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1361       }
1362     }
1363   } else {
1364     *ia = tia;
1365     if (ja) *ja = tja;
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1373 {
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   if (!ia) PetscFunctionReturn(0);
1378   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1379     ierr = PetscFree(*ia);CHKERRQ(ierr);
1380     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1388 {
1389   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393 #if defined(PETSC_USE_LOG)
1394   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1395 #endif
1396   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1397   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1398   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1399   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1400   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1401   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1402   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1403   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1404   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1405   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1406   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1407   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1408   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1409 
1410   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1411   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1412   ierr = PetscFree(A->data);CHKERRQ(ierr);
1413 
1414   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   switch (op) {
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     break;
1440   case MAT_KEEP_NONZERO_PATTERN:
1441     a->keepnonzeropattern = flg;
1442     break;
1443   case MAT_NEW_NONZERO_LOCATIONS:
1444     a->nonew = (flg ? 0 : 1);
1445     break;
1446   case MAT_NEW_NONZERO_LOCATION_ERR:
1447     a->nonew = (flg ? -1 : 0);
1448     break;
1449   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1450     a->nonew = (flg ? -2 : 0);
1451     break;
1452   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1453     a->nounused = (flg ? -1 : 0);
1454     break;
1455   case MAT_CHECK_COMPRESSED_ROW:
1456     a->compressedrow.check = flg;
1457     break;
1458   case MAT_NEW_DIAGONALS:
1459   case MAT_IGNORE_OFF_PROC_ENTRIES:
1460   case MAT_USE_HASH_TABLE:
1461     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1462     break;
1463   case MAT_SPD:
1464   case MAT_SYMMETRIC:
1465   case MAT_STRUCTURALLY_SYMMETRIC:
1466   case MAT_HERMITIAN:
1467   case MAT_SYMMETRY_ETERNAL:
1468     /* These options are handled directly by MatSetOption() */
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1483   MatScalar      *aa,*aa_i;
1484   PetscScalar    *v_i;
1485 
1486   PetscFunctionBegin;
1487   bs  = A->rmap->bs;
1488   ai  = a->i;
1489   aj  = a->j;
1490   aa  = a->a;
1491   bs2 = a->bs2;
1492 
1493   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1494 
1495   bn  = row/bs;   /* Block number */
1496   bp  = row % bs; /* Block Position */
1497   M   = ai[bn+1] - ai[bn];
1498   *nz = bs*M;
1499 
1500   if (v) {
1501     *v = 0;
1502     if (*nz) {
1503       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1504       for (i=0; i<M; i++) { /* for each block in the block row */
1505         v_i  = *v + i*bs;
1506         aa_i = aa + bs2*(ai[bn] + i);
1507         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1508       }
1509     }
1510   }
1511 
1512   if (idx) {
1513     *idx = 0;
1514     if (*nz) {
1515       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1516       for (i=0; i<M; i++) { /* for each block in the block row */
1517         idx_i = *idx + i*bs;
1518         itmp  = bs*aj[ai[bn] + i];
1519         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1520       }
1521     }
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1529 {
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1534   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1543 {
1544   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1545   Mat            C;
1546   PetscErrorCode ierr;
1547   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1548   PetscInt       *rows,*cols,bs2=a->bs2;
1549   MatScalar      *array;
1550 
1551   PetscFunctionBegin;
1552   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1553   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1554     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1555     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1556 
1557     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1558     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1559     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1560     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1561     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1562     ierr = PetscFree(col);CHKERRQ(ierr);
1563   } else {
1564     C = *B;
1565   }
1566 
1567   array = a->a;
1568   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1569   for (i=0; i<mbs; i++) {
1570     cols[0] = i*bs;
1571     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1572     len = ai[i+1] - ai[i];
1573     for (j=0; j<len; j++) {
1574       rows[0] = (*aj++)*bs;
1575       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1576       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1577       array += bs2;
1578     }
1579   }
1580   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1581 
1582   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584 
1585   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1586     *B = C;
1587   } else {
1588     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1595 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1596 {
1597   PetscErrorCode ierr;
1598   Mat            Btrans;
1599 
1600   PetscFunctionBegin;
1601   *f   = PETSC_FALSE;
1602   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1603   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1604   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1610 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1611 {
1612   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1615   int            fd;
1616   PetscScalar    *aa;
1617   FILE           *file;
1618 
1619   PetscFunctionBegin;
1620   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1621   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1622   col_lens[0] = MAT_FILE_CLASSID;
1623 
1624   col_lens[1] = A->rmap->N;
1625   col_lens[2] = A->cmap->n;
1626   col_lens[3] = a->nz*bs2;
1627 
1628   /* store lengths of each row and write (including header) to file */
1629   count = 0;
1630   for (i=0; i<a->mbs; i++) {
1631     for (j=0; j<bs; j++) {
1632       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1633     }
1634   }
1635   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1636   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1637 
1638   /* store column indices (zero start index) */
1639   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1640   count = 0;
1641   for (i=0; i<a->mbs; i++) {
1642     for (j=0; j<bs; j++) {
1643       for (k=a->i[i]; k<a->i[i+1]; k++) {
1644         for (l=0; l<bs; l++) {
1645           jj[count++] = bs*a->j[k] + l;
1646         }
1647       }
1648     }
1649   }
1650   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1651   ierr = PetscFree(jj);CHKERRQ(ierr);
1652 
1653   /* store nonzero values */
1654   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1655   count = 0;
1656   for (i=0; i<a->mbs; i++) {
1657     for (j=0; j<bs; j++) {
1658       for (k=a->i[i]; k<a->i[i+1]; k++) {
1659         for (l=0; l<bs; l++) {
1660           aa[count++] = a->a[bs2*k + l*bs + j];
1661         }
1662       }
1663     }
1664   }
1665   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1666   ierr = PetscFree(aa);CHKERRQ(ierr);
1667 
1668   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1669   if (file) {
1670     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1671   }
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1677 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1678 {
1679   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1680   PetscErrorCode    ierr;
1681   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1682   PetscViewerFormat format;
1683 
1684   PetscFunctionBegin;
1685   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1686   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1687     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1688   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1689     Mat aij;
1690     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1691     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1692     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1693   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1694       PetscFunctionReturn(0);
1695   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1696     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1697     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1698     for (i=0; i<a->mbs; i++) {
1699       for (j=0; j<bs; j++) {
1700         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1701         for (k=a->i[i]; k<a->i[i+1]; k++) {
1702           for (l=0; l<bs; l++) {
1703 #if defined(PETSC_USE_COMPLEX)
1704             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1705               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1706                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1707             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1708               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1709                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1710             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1711               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1712             }
1713 #else
1714             if (a->a[bs2*k + l*bs + j] != 0.0) {
1715               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1716             }
1717 #endif
1718           }
1719         }
1720         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1721       }
1722     }
1723     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1724   } else {
1725     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1726     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1727     for (i=0; i<a->mbs; i++) {
1728       for (j=0; j<bs; j++) {
1729         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1730         for (k=a->i[i]; k<a->i[i+1]; k++) {
1731           for (l=0; l<bs; l++) {
1732 #if defined(PETSC_USE_COMPLEX)
1733             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1734               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1735                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1738                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             } else {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1741             }
1742 #else
1743             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1744 #endif
1745           }
1746         }
1747         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1748       }
1749     }
1750     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1751   }
1752   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #include <petscdraw.h>
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1759 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1760 {
1761   Mat               A = (Mat) Aa;
1762   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1763   PetscErrorCode    ierr;
1764   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1765   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1766   MatScalar         *aa;
1767   PetscViewer       viewer;
1768   PetscViewerFormat format;
1769 
1770   PetscFunctionBegin;
1771   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1772   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1773 
1774   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1775 
1776   /* loop over matrix elements drawing boxes */
1777 
1778   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1779     color = PETSC_DRAW_BLUE;
1780     for (i=0,row=0; i<mbs; i++,row+=bs) {
1781       for (j=a->i[i]; j<a->i[i+1]; j++) {
1782         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1783         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1784         aa  = a->a + j*bs2;
1785         for (k=0; k<bs; k++) {
1786           for (l=0; l<bs; l++) {
1787             if (PetscRealPart(*aa++) >=  0.) continue;
1788             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1789           }
1790         }
1791       }
1792     }
1793     color = PETSC_DRAW_CYAN;
1794     for (i=0,row=0; i<mbs; i++,row+=bs) {
1795       for (j=a->i[i]; j<a->i[i+1]; j++) {
1796         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1797         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1798         aa  = a->a + j*bs2;
1799         for (k=0; k<bs; k++) {
1800           for (l=0; l<bs; l++) {
1801             if (PetscRealPart(*aa++) != 0.) continue;
1802             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1803           }
1804         }
1805       }
1806     }
1807     color = PETSC_DRAW_RED;
1808     for (i=0,row=0; i<mbs; i++,row+=bs) {
1809       for (j=a->i[i]; j<a->i[i+1]; j++) {
1810         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1811         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1812         aa  = a->a + j*bs2;
1813         for (k=0; k<bs; k++) {
1814           for (l=0; l<bs; l++) {
1815             if (PetscRealPart(*aa++) <= 0.) continue;
1816             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1817           }
1818         }
1819       }
1820     }
1821   } else {
1822     /* use contour shading to indicate magnitude of values */
1823     /* first determine max of all nonzero values */
1824     PetscDraw popup;
1825     PetscReal scale,maxv = 0.0;
1826 
1827     for (i=0; i<a->nz*a->bs2; i++) {
1828       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1829     }
1830     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1831     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1832     if (popup) {
1833       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1834     }
1835     for (i=0,row=0; i<mbs; i++,row+=bs) {
1836       for (j=a->i[i]; j<a->i[i+1]; j++) {
1837         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1838         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1839         aa  = a->a + j*bs2;
1840         for (k=0; k<bs; k++) {
1841           for (l=0; l<bs; l++) {
1842             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1843             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1844           }
1845         }
1846       }
1847     }
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1854 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1855 {
1856   PetscErrorCode ierr;
1857   PetscReal      xl,yl,xr,yr,w,h;
1858   PetscDraw      draw;
1859   PetscBool      isnull;
1860 
1861   PetscFunctionBegin;
1862   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1863   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1864 
1865   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1866   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1867   xr  += w;    yr += h;  xl = -w;     yl = -h;
1868   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1869   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1870   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatView_SeqBAIJ"
1876 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1877 {
1878   PetscErrorCode ierr;
1879   PetscBool      iascii,isbinary,isdraw;
1880 
1881   PetscFunctionBegin;
1882   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1883   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1884   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1885   if (iascii) {
1886     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1887   } else if (isbinary) {
1888     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1889   } else if (isdraw) {
1890     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1891   } else {
1892     Mat B;
1893     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1894     ierr = MatView(B,viewer);CHKERRQ(ierr);
1895     ierr = MatDestroy(&B);CHKERRQ(ierr);
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1903 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1904 {
1905   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1906   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1907   PetscInt    *ai = a->i,*ailen = a->ilen;
1908   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1909   MatScalar   *ap,*aa = a->a;
1910 
1911   PetscFunctionBegin;
1912   for (k=0; k<m; k++) { /* loop over rows */
1913     row = im[k]; brow = row/bs;
1914     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1915     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1916     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1917     nrow = ailen[brow];
1918     for (l=0; l<n; l++) { /* loop over columns */
1919       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1920       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1921       col  = in[l];
1922       bcol = col/bs;
1923       cidx = col%bs;
1924       ridx = row%bs;
1925       high = nrow;
1926       low  = 0; /* assume unsorted */
1927       while (high-low > 5) {
1928         t = (low+high)/2;
1929         if (rp[t] > bcol) high = t;
1930         else             low  = t;
1931       }
1932       for (i=low; i<high; i++) {
1933         if (rp[i] > bcol) break;
1934         if (rp[i] == bcol) {
1935           *v++ = ap[bs2*i+bs*cidx+ridx];
1936           goto finished;
1937         }
1938       }
1939       *v++ = 0.0;
1940 finished:;
1941     }
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1948 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1949 {
1950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1952   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1953   PetscErrorCode    ierr;
1954   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1955   PetscBool         roworiented=a->roworiented;
1956   const PetscScalar *value     = v;
1957   MatScalar         *ap,*aa = a->a,*bap;
1958 
1959   PetscFunctionBegin;
1960   if (roworiented) {
1961     stepval = (n-1)*bs;
1962   } else {
1963     stepval = (m-1)*bs;
1964   }
1965   for (k=0; k<m; k++) { /* loop over added rows */
1966     row = im[k];
1967     if (row < 0) continue;
1968 #if defined(PETSC_USE_DEBUG)
1969     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1970 #endif
1971     rp   = aj + ai[row];
1972     ap   = aa + bs2*ai[row];
1973     rmax = imax[row];
1974     nrow = ailen[row];
1975     low  = 0;
1976     high = nrow;
1977     for (l=0; l<n; l++) { /* loop over added columns */
1978       if (in[l] < 0) continue;
1979 #if defined(PETSC_USE_DEBUG)
1980       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1981 #endif
1982       col = in[l];
1983       if (roworiented) {
1984         value = v + (k*(stepval+bs) + l)*bs;
1985       } else {
1986         value = v + (l*(stepval+bs) + k)*bs;
1987       }
1988       if (col <= lastcol) low = 0;
1989       else high = nrow;
1990       lastcol = col;
1991       while (high-low > 7) {
1992         t = (low+high)/2;
1993         if (rp[t] > col) high = t;
1994         else             low  = t;
1995       }
1996       for (i=low; i<high; i++) {
1997         if (rp[i] > col) break;
1998         if (rp[i] == col) {
1999           bap = ap +  bs2*i;
2000           if (roworiented) {
2001             if (is == ADD_VALUES) {
2002               for (ii=0; ii<bs; ii++,value+=stepval) {
2003                 for (jj=ii; jj<bs2; jj+=bs) {
2004                   bap[jj] += *value++;
2005                 }
2006               }
2007             } else {
2008               for (ii=0; ii<bs; ii++,value+=stepval) {
2009                 for (jj=ii; jj<bs2; jj+=bs) {
2010                   bap[jj] = *value++;
2011                 }
2012               }
2013             }
2014           } else {
2015             if (is == ADD_VALUES) {
2016               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2017                 for (jj=0; jj<bs; jj++) {
2018                   bap[jj] += value[jj];
2019                 }
2020                 bap += bs;
2021               }
2022             } else {
2023               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2024                 for (jj=0; jj<bs; jj++) {
2025                   bap[jj]  = value[jj];
2026                 }
2027                 bap += bs;
2028               }
2029             }
2030           }
2031           goto noinsert2;
2032         }
2033       }
2034       if (nonew == 1) goto noinsert2;
2035       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2036       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2037       N = nrow++ - 1; high++;
2038       /* shift up all the later entries in this row */
2039       for (ii=N; ii>=i; ii--) {
2040         rp[ii+1] = rp[ii];
2041         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2042       }
2043       if (N >= i) {
2044         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2045       }
2046       rp[i] = col;
2047       bap   = ap +  bs2*i;
2048       if (roworiented) {
2049         for (ii=0; ii<bs; ii++,value+=stepval) {
2050           for (jj=ii; jj<bs2; jj+=bs) {
2051             bap[jj] = *value++;
2052           }
2053         }
2054       } else {
2055         for (ii=0; ii<bs; ii++,value+=stepval) {
2056           for (jj=0; jj<bs; jj++) {
2057             *bap++ = *value++;
2058           }
2059         }
2060       }
2061 noinsert2:;
2062       low = i;
2063     }
2064     ailen[row] = nrow;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2071 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2072 {
2073   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2074   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2075   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2076   PetscErrorCode ierr;
2077   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2078   MatScalar      *aa  = a->a,*ap;
2079   PetscReal      ratio=0.6;
2080 
2081   PetscFunctionBegin;
2082   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2083 
2084   if (m) rmax = ailen[0];
2085   for (i=1; i<mbs; i++) {
2086     /* move each row back by the amount of empty slots (fshift) before it*/
2087     fshift += imax[i-1] - ailen[i-1];
2088     rmax    = PetscMax(rmax,ailen[i]);
2089     if (fshift) {
2090       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2091       N  = ailen[i];
2092       for (j=0; j<N; j++) {
2093         ip[j-fshift] = ip[j];
2094 
2095         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2096       }
2097     }
2098     ai[i] = ai[i-1] + ailen[i-1];
2099   }
2100   if (mbs) {
2101     fshift += imax[mbs-1] - ailen[mbs-1];
2102     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2103   }
2104   /* reset ilen and imax for each row */
2105   for (i=0; i<mbs; i++) {
2106     ailen[i] = imax[i] = ai[i+1] - ai[i];
2107   }
2108   a->nz = ai[mbs];
2109 
2110   /* diagonals may have moved, so kill the diagonal pointers */
2111   a->idiagvalid = PETSC_FALSE;
2112   if (fshift && a->diag) {
2113     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2114     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2115     a->diag = 0;
2116   }
2117   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2118   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2119   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2121 
2122   A->info.mallocs    += a->reallocs;
2123   a->reallocs         = 0;
2124   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2125 
2126   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2127 
2128   A->same_nonzero = PETSC_TRUE;
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /*
2133    This function returns an array of flags which indicate the locations of contiguous
2134    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2135    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2136    Assume: sizes should be long enough to hold all the values.
2137 */
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2140 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2141 {
2142   PetscInt  i,j,k,row;
2143   PetscBool flg;
2144 
2145   PetscFunctionBegin;
2146   for (i=0,j=0; i<n; j++) {
2147     row = idx[i];
2148     if (row%bs!=0) { /* Not the begining of a block */
2149       sizes[j] = 1;
2150       i++;
2151     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2152       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2153       i++;
2154     } else { /* Begining of the block, so check if the complete block exists */
2155       flg = PETSC_TRUE;
2156       for (k=1; k<bs; k++) {
2157         if (row+k != idx[i+k]) { /* break in the block */
2158           flg = PETSC_FALSE;
2159           break;
2160         }
2161       }
2162       if (flg) { /* No break in the bs */
2163         sizes[j] = bs;
2164         i       += bs;
2165       } else {
2166         sizes[j] = 1;
2167         i++;
2168       }
2169     }
2170   }
2171   *bs_max = j;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2177 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2178 {
2179   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2180   PetscErrorCode    ierr;
2181   PetscInt          i,j,k,count,*rows;
2182   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2183   PetscScalar       zero = 0.0;
2184   MatScalar         *aa;
2185   const PetscScalar *xx;
2186   PetscScalar       *bb;
2187 
2188   PetscFunctionBegin;
2189   /* fix right hand side if needed */
2190   if (x && b) {
2191     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2192     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2193     for (i=0; i<is_n; i++) {
2194       bb[is_idx[i]] = diag*xx[is_idx[i]];
2195     }
2196     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2197     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2198   }
2199 
2200   /* Make a copy of the IS and  sort it */
2201   /* allocate memory for rows,sizes */
2202   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2203 
2204   /* copy IS values to rows, and sort them */
2205   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2206   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2207 
2208   if (baij->keepnonzeropattern) {
2209     for (i=0; i<is_n; i++) sizes[i] = 1;
2210     bs_max          = is_n;
2211     A->same_nonzero = PETSC_TRUE;
2212   } else {
2213     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2214 
2215     A->same_nonzero = PETSC_FALSE;
2216   }
2217 
2218   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2219     row = rows[j];
2220     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2221     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2222     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2223     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2224       if (diag != (PetscScalar)0.0) {
2225         if (baij->ilen[row/bs] > 0) {
2226           baij->ilen[row/bs]       = 1;
2227           baij->j[baij->i[row/bs]] = row/bs;
2228 
2229           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2230         }
2231         /* Now insert all the diagonal values for this bs */
2232         for (k=0; k<bs; k++) {
2233           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2234         }
2235       } else { /* (diag == 0.0) */
2236         baij->ilen[row/bs] = 0;
2237       } /* end (diag == 0.0) */
2238     } else { /* (sizes[i] != bs) */
2239 #if defined(PETSC_USE_DEBUG)
2240       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2241 #endif
2242       for (k=0; k<count; k++) {
2243         aa[0] =  zero;
2244         aa   += bs;
2245       }
2246       if (diag != (PetscScalar)0.0) {
2247         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2248       }
2249     }
2250   }
2251 
2252   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2253   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2259 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2260 {
2261   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2262   PetscErrorCode    ierr;
2263   PetscInt          i,j,k,count;
2264   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2265   PetscScalar       zero = 0.0;
2266   MatScalar         *aa;
2267   const PetscScalar *xx;
2268   PetscScalar       *bb;
2269   PetscBool         *zeroed,vecs = PETSC_FALSE;
2270 
2271   PetscFunctionBegin;
2272   /* fix right hand side if needed */
2273   if (x && b) {
2274     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2275     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2276     vecs = PETSC_TRUE;
2277   }
2278   A->same_nonzero = PETSC_TRUE;
2279 
2280   /* zero the columns */
2281   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2282   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2283   for (i=0; i<is_n; i++) {
2284     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2285     zeroed[is_idx[i]] = PETSC_TRUE;
2286   }
2287   for (i=0; i<A->rmap->N; i++) {
2288     if (!zeroed[i]) {
2289       row = i/bs;
2290       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2291         for (k=0; k<bs; k++) {
2292           col = bs*baij->j[j] + k;
2293           if (zeroed[col]) {
2294             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2295             if (vecs) bb[i] -= aa[0]*xx[col];
2296             aa[0] = 0.0;
2297           }
2298         }
2299       }
2300     } else if (vecs) bb[i] = diag*xx[i];
2301   }
2302   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2303   if (vecs) {
2304     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2305     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2306   }
2307 
2308   /* zero the rows */
2309   for (i=0; i<is_n; i++) {
2310     row   = is_idx[i];
2311     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2312     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2313     for (k=0; k<count; k++) {
2314       aa[0] =  zero;
2315       aa   += bs;
2316     }
2317     if (diag != (PetscScalar)0.0) {
2318       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2319     }
2320   }
2321   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2327 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2328 {
2329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2330   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2331   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2332   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2333   PetscErrorCode ierr;
2334   PetscInt       ridx,cidx,bs2=a->bs2;
2335   PetscBool      roworiented=a->roworiented;
2336   MatScalar      *ap,value,*aa=a->a,*bap;
2337 
2338   PetscFunctionBegin;
2339   if (v) PetscValidScalarPointer(v,6);
2340   for (k=0; k<m; k++) { /* loop over added rows */
2341     row  = im[k];
2342     brow = row/bs;
2343     if (row < 0) continue;
2344 #if defined(PETSC_USE_DEBUG)
2345     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2346 #endif
2347     rp   = aj + ai[brow];
2348     ap   = aa + bs2*ai[brow];
2349     rmax = imax[brow];
2350     nrow = ailen[brow];
2351     low  = 0;
2352     high = nrow;
2353     for (l=0; l<n; l++) { /* loop over added columns */
2354       if (in[l] < 0) continue;
2355 #if defined(PETSC_USE_DEBUG)
2356       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2357 #endif
2358       col  = in[l]; bcol = col/bs;
2359       ridx = row % bs; cidx = col % bs;
2360       if (roworiented) {
2361         value = v[l + k*n];
2362       } else {
2363         value = v[k + l*m];
2364       }
2365       if (col <= lastcol) low = 0; else high = nrow;
2366       lastcol = col;
2367       while (high-low > 7) {
2368         t = (low+high)/2;
2369         if (rp[t] > bcol) high = t;
2370         else              low  = t;
2371       }
2372       for (i=low; i<high; i++) {
2373         if (rp[i] > bcol) break;
2374         if (rp[i] == bcol) {
2375           bap = ap +  bs2*i + bs*cidx + ridx;
2376           if (is == ADD_VALUES) *bap += value;
2377           else                  *bap  = value;
2378           goto noinsert1;
2379         }
2380       }
2381       if (nonew == 1) goto noinsert1;
2382       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2383       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2384       N = nrow++ - 1; high++;
2385       /* shift up all the later entries in this row */
2386       for (ii=N; ii>=i; ii--) {
2387         rp[ii+1] = rp[ii];
2388         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2389       }
2390       if (N>=i) {
2391         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2392       }
2393       rp[i]                      = bcol;
2394       ap[bs2*i + bs*cidx + ridx] = value;
2395       a->nz++;
2396 noinsert1:;
2397       low = i;
2398     }
2399     ailen[brow] = nrow;
2400   }
2401   A->same_nonzero = PETSC_FALSE;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2407 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2408 {
2409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2410   Mat            outA;
2411   PetscErrorCode ierr;
2412   PetscBool      row_identity,col_identity;
2413 
2414   PetscFunctionBegin;
2415   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2416   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2417   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2418   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2419 
2420   outA            = inA;
2421   inA->factortype = MAT_FACTOR_LU;
2422 
2423   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2424 
2425   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2426   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2427   a->row = row;
2428   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2429   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2430   a->col = col;
2431 
2432   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2433   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2434   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2435   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2436 
2437   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2438   if (!a->solve_work) {
2439     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2441   }
2442   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2448 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2449 {
2450   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2451   PetscInt    i,nz,mbs;
2452 
2453   PetscFunctionBegin;
2454   nz  = baij->maxnz;
2455   mbs = baij->mbs;
2456   for (i=0; i<nz; i++) {
2457     baij->j[i] = indices[i];
2458   }
2459   baij->nz = nz;
2460   for (i=0; i<mbs; i++) {
2461     baij->ilen[i] = baij->imax[i];
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2468 /*@
2469     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2470        in the matrix.
2471 
2472   Input Parameters:
2473 +  mat - the SeqBAIJ matrix
2474 -  indices - the column indices
2475 
2476   Level: advanced
2477 
2478   Notes:
2479     This can be called if you have precomputed the nonzero structure of the
2480   matrix and want to provide it to the matrix object to improve the performance
2481   of the MatSetValues() operation.
2482 
2483     You MUST have set the correct numbers of nonzeros per row in the call to
2484   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2485 
2486     MUST be called before any calls to MatSetValues();
2487 
2488 @*/
2489 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2490 {
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2495   PetscValidPointer(indices,2);
2496   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2502 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2503 {
2504   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2505   PetscErrorCode ierr;
2506   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2507   PetscReal      atmp;
2508   PetscScalar    *x,zero = 0.0;
2509   MatScalar      *aa;
2510   PetscInt       ncols,brow,krow,kcol;
2511 
2512   PetscFunctionBegin;
2513   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2514   bs  = A->rmap->bs;
2515   aa  = a->a;
2516   ai  = a->i;
2517   aj  = a->j;
2518   mbs = a->mbs;
2519 
2520   ierr = VecSet(v,zero);CHKERRQ(ierr);
2521   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2522   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2523   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2524   for (i=0; i<mbs; i++) {
2525     ncols = ai[1] - ai[0]; ai++;
2526     brow  = bs*i;
2527     for (j=0; j<ncols; j++) {
2528       for (kcol=0; kcol<bs; kcol++) {
2529         for (krow=0; krow<bs; krow++) {
2530           atmp = PetscAbsScalar(*aa);aa++;
2531           row  = brow + krow;   /* row index */
2532           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2533           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2534         }
2535       }
2536       aj++;
2537     }
2538   }
2539   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatCopy_SeqBAIJ"
2545 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2546 {
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   /* If the two matrices have the same copy implementation, use fast copy. */
2551   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2552     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2553     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2554     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2555 
2556     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2557     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2558     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2559   } else {
2560     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2567 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2568 {
2569   PetscErrorCode ierr;
2570 
2571   PetscFunctionBegin;
2572   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2578 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2579 {
2580   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2581 
2582   PetscFunctionBegin;
2583   *array = a->a;
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2589 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2590 {
2591   PetscFunctionBegin;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2598 {
2599   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2600   PetscErrorCode ierr;
2601   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2602   PetscBLASInt   one=1;
2603 
2604   PetscFunctionBegin;
2605   if (str == SAME_NONZERO_PATTERN) {
2606     PetscScalar  alpha = a;
2607     PetscBLASInt bnz;
2608     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2609     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2610   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2611     if (y->xtoy && y->XtoY != X) {
2612       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2613       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2614     }
2615     if (!y->xtoy) { /* get xtoy */
2616       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2617       y->XtoY = X;
2618       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2619     }
2620     for (i=0; i<x->nz; i++) {
2621       j = 0;
2622       while (j < bs2) {
2623         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2624         j++;
2625       }
2626     }
2627     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2628   } else {
2629     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2630   }
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2636 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2637 {
2638   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2639   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2640   MatScalar   *aa = a->a;
2641 
2642   PetscFunctionBegin;
2643   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2649 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2650 {
2651   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2652   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2653   MatScalar   *aa = a->a;
2654 
2655   PetscFunctionBegin;
2656   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2664 /*
2665     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2666 */
2667 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2668 {
2669   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2670   PetscErrorCode ierr;
2671   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2672   PetscInt       nz = a->i[m],row,*jj,mr,col;
2673 
2674   PetscFunctionBegin;
2675   *nn = n;
2676   if (!ia) PetscFunctionReturn(0);
2677   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2678   else {
2679     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2680     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2682     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2683     jj   = a->j;
2684     for (i=0; i<nz; i++) {
2685       collengths[jj[i]]++;
2686     }
2687     cia[0] = oshift;
2688     for (i=0; i<n; i++) {
2689       cia[i+1] = cia[i] + collengths[i];
2690     }
2691     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2692     jj   = a->j;
2693     for (row=0; row<m; row++) {
2694       mr = a->i[row+1] - a->i[row];
2695       for (i=0; i<mr; i++) {
2696         col = *jj++;
2697 
2698         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2699       }
2700     }
2701     ierr = PetscFree(collengths);CHKERRQ(ierr);
2702     *ia  = cia; *ja = cja;
2703   }
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 #undef __FUNCT__
2708 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2709 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2710 {
2711   PetscErrorCode ierr;
2712 
2713   PetscFunctionBegin;
2714   if (!ia) PetscFunctionReturn(0);
2715   ierr = PetscFree(*ia);CHKERRQ(ierr);
2716   ierr = PetscFree(*ja);CHKERRQ(ierr);
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2722 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2723 {
2724   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2725   PetscErrorCode ierr;
2726   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2727   PetscScalar    dx,*y,*xx,*w3_array;
2728   PetscScalar    *vscale_array;
2729   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2730   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2731   void           *fctx   = coloring->fctx;
2732   PetscBool      flg     = PETSC_FALSE;
2733   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2734   Vec            x1_tmp;
2735 
2736   PetscFunctionBegin;
2737   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2738   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2739   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2740   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2741 
2742   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2743   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2744   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2745   if (flg) {
2746     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2747   } else {
2748     PetscBool assembled;
2749     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2750     if (assembled) {
2751       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2752     }
2753   }
2754 
2755   x1_tmp = x1;
2756   if (!coloring->vscale) {
2757     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2758   }
2759 
2760   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2761     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2762   }
2763   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2764 
2765   /* Set w1 = F(x1) */
2766   if (!coloring->fset) {
2767     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2768     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2769     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2770   } else {
2771     coloring->fset = PETSC_FALSE;
2772   }
2773 
2774   if (!coloring->w3) {
2775     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2777   }
2778   w3 = coloring->w3;
2779 
2780   CHKMEMQ;
2781   /* Compute all the local scale factors, including ghost points */
2782   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2783   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2784   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2785   if (ctype == IS_COLORING_GHOSTED) {
2786     col_start = 0; col_end = N;
2787   } else if (ctype == IS_COLORING_GLOBAL) {
2788     xx           = xx - start;
2789     vscale_array = vscale_array - start;
2790     col_start    = start; col_end = N + start;
2791   }
2792   CHKMEMQ;
2793   for (col=col_start; col<col_end; col++) {
2794     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2795     if (coloring->htype[0] == 'w') {
2796       dx = 1.0 + unorm;
2797     } else {
2798       dx = xx[col];
2799     }
2800     if (dx == (PetscScalar)0.0) dx = 1.0;
2801     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2802     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2803     dx               *= epsilon;
2804     vscale_array[col] = (PetscScalar)1.0/dx;
2805   }     CHKMEMQ;
2806   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2807   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2808   if (ctype == IS_COLORING_GLOBAL) {
2809     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2810     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2811   }
2812   CHKMEMQ;
2813   if (coloring->vscaleforrow) {
2814     vscaleforrow = coloring->vscaleforrow;
2815   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2816 
2817   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2818   /*
2819     Loop over each color
2820   */
2821   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2822   for (k=0; k<coloring->ncolors; k++) {
2823     coloring->currentcolor = k;
2824     for (i=0; i<bs; i++) {
2825       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2826       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2827       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2828       /*
2829         Loop over each column associated with color
2830         adding the perturbation to the vector w3.
2831       */
2832       for (l=0; l<coloring->ncolumns[k]; l++) {
2833         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2834         if (coloring->htype[0] == 'w') {
2835           dx = 1.0 + unorm;
2836         } else {
2837           dx = xx[col];
2838         }
2839         if (dx == (PetscScalar)0.0) dx = 1.0;
2840         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2841         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2842         dx *= epsilon;
2843         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2844         w3_array[col] += dx;
2845       }
2846       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2847       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2848 
2849       /*
2850         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2851         w2 = F(x1 + dx) - F(x1)
2852       */
2853       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2854       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2855       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2856       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2857 
2858       /*
2859         Loop over rows of vector, putting results into Jacobian matrix
2860       */
2861       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2862       for (l=0; l<coloring->nrows[k]; l++) {
2863         row = bs*coloring->rows[k][l];                /* local row index */
2864         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2865         for (j=0; j<bs; j++) {
2866           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2867           srows[j]  = row + start + j;
2868         }
2869         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2870       }
2871       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2872     }
2873   } /* endof for each color */
2874   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2875   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2876   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2877   ierr = PetscFree(srows);CHKERRQ(ierr);
2878 
2879   coloring->currentcolor = -1;
2880 
2881   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 /* -------------------------------------------------------------------*/
2888 static struct _MatOps MatOps_Values = {
2889         MatSetValues_SeqBAIJ,
2890         MatGetRow_SeqBAIJ,
2891         MatRestoreRow_SeqBAIJ,
2892         MatMult_SeqBAIJ_N,
2893 /* 4*/  MatMultAdd_SeqBAIJ_N,
2894         MatMultTranspose_SeqBAIJ,
2895         MatMultTransposeAdd_SeqBAIJ,
2896         0,
2897         0,
2898         0,
2899 /* 10*/ 0,
2900         MatLUFactor_SeqBAIJ,
2901         0,
2902         0,
2903         MatTranspose_SeqBAIJ,
2904 /* 15*/ MatGetInfo_SeqBAIJ,
2905         MatEqual_SeqBAIJ,
2906         MatGetDiagonal_SeqBAIJ,
2907         MatDiagonalScale_SeqBAIJ,
2908         MatNorm_SeqBAIJ,
2909 /* 20*/ 0,
2910         MatAssemblyEnd_SeqBAIJ,
2911         MatSetOption_SeqBAIJ,
2912         MatZeroEntries_SeqBAIJ,
2913 /* 24*/ MatZeroRows_SeqBAIJ,
2914         0,
2915         0,
2916         0,
2917         0,
2918 /* 29*/ MatSetUp_SeqBAIJ,
2919         0,
2920         0,
2921         0,
2922         0,
2923 /* 34*/ MatDuplicate_SeqBAIJ,
2924         0,
2925         0,
2926         MatILUFactor_SeqBAIJ,
2927         0,
2928 /* 39*/ MatAXPY_SeqBAIJ,
2929         MatGetSubMatrices_SeqBAIJ,
2930         MatIncreaseOverlap_SeqBAIJ,
2931         MatGetValues_SeqBAIJ,
2932         MatCopy_SeqBAIJ,
2933 /* 44*/ 0,
2934         MatScale_SeqBAIJ,
2935         0,
2936         0,
2937         MatZeroRowsColumns_SeqBAIJ,
2938 /* 49*/ 0,
2939         MatGetRowIJ_SeqBAIJ,
2940         MatRestoreRowIJ_SeqBAIJ,
2941         MatGetColumnIJ_SeqBAIJ,
2942         MatRestoreColumnIJ_SeqBAIJ,
2943 /* 54*/ MatFDColoringCreate_SeqAIJ,
2944         0,
2945         0,
2946         0,
2947         MatSetValuesBlocked_SeqBAIJ,
2948 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2949         MatDestroy_SeqBAIJ,
2950         MatView_SeqBAIJ,
2951         0,
2952         0,
2953 /* 64*/ 0,
2954         0,
2955         0,
2956         0,
2957         0,
2958 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2959         0,
2960         MatConvert_Basic,
2961         0,
2962         0,
2963 /* 74*/ 0,
2964         MatFDColoringApply_BAIJ,
2965         0,
2966         0,
2967         0,
2968 /* 79*/ 0,
2969         0,
2970         0,
2971         0,
2972         MatLoad_SeqBAIJ,
2973 /* 84*/ 0,
2974         0,
2975         0,
2976         0,
2977         0,
2978 /* 89*/ 0,
2979         0,
2980         0,
2981         0,
2982         0,
2983 /* 94*/ 0,
2984         0,
2985         0,
2986         0,
2987         0,
2988 /* 99*/ 0,
2989         0,
2990         0,
2991         0,
2992         0,
2993 /*104*/ 0,
2994         MatRealPart_SeqBAIJ,
2995         MatImaginaryPart_SeqBAIJ,
2996         0,
2997         0,
2998 /*109*/ 0,
2999         0,
3000         0,
3001         0,
3002         MatMissingDiagonal_SeqBAIJ,
3003 /*114*/ 0,
3004         0,
3005         0,
3006         0,
3007         0,
3008 /*119*/ 0,
3009         0,
3010         MatMultHermitianTranspose_SeqBAIJ,
3011         MatMultHermitianTransposeAdd_SeqBAIJ,
3012         0,
3013 /*124*/ 0,
3014         0,
3015         MatInvertBlockDiagonal_SeqBAIJ
3016 };
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3020 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3021 {
3022   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3023   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3028 
3029   /* allocate space for values if not already there */
3030   if (!aij->saved_values) {
3031     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3032     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3033   }
3034 
3035   /* copy values over */
3036   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3042 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3043 {
3044   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3045   PetscErrorCode ierr;
3046   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3047 
3048   PetscFunctionBegin;
3049   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3050   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3051 
3052   /* copy values over */
3053   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 PETSC_EXTERN_C PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3058 PETSC_EXTERN_C PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3062 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3063 {
3064   Mat_SeqBAIJ    *b;
3065   PetscErrorCode ierr;
3066   PetscInt       i,mbs,nbs,bs2;
3067   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3068 
3069   PetscFunctionBegin;
3070   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3071   if (nz == MAT_SKIP_ALLOCATION) {
3072     skipallocation = PETSC_TRUE;
3073     nz             = 0;
3074   }
3075 
3076   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3077   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3078   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3079   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3080   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3081 
3082   B->preallocated = PETSC_TRUE;
3083 
3084   mbs = B->rmap->n/bs;
3085   nbs = B->cmap->n/bs;
3086   bs2 = bs*bs;
3087 
3088   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3089 
3090   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3091   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3092   if (nnz) {
3093     for (i=0; i<mbs; i++) {
3094       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3095       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3096     }
3097   }
3098 
3099   b    = (Mat_SeqBAIJ*)B->data;
3100   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3101   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3102   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3103 
3104   if (!flg) {
3105     switch (bs) {
3106     case 1:
3107       B->ops->mult    = MatMult_SeqBAIJ_1;
3108       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3109       B->ops->sor     = MatSOR_SeqBAIJ_1;
3110       break;
3111     case 2:
3112       B->ops->mult    = MatMult_SeqBAIJ_2;
3113       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3114       B->ops->sor     = MatSOR_SeqBAIJ_2;
3115       break;
3116     case 3:
3117       B->ops->mult    = MatMult_SeqBAIJ_3;
3118       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3119       B->ops->sor     = MatSOR_SeqBAIJ_3;
3120       break;
3121     case 4:
3122       B->ops->mult    = MatMult_SeqBAIJ_4;
3123       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3124       B->ops->sor     = MatSOR_SeqBAIJ_4;
3125       break;
3126     case 5:
3127       B->ops->mult    = MatMult_SeqBAIJ_5;
3128       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3129       B->ops->sor     = MatSOR_SeqBAIJ_5;
3130       break;
3131     case 6:
3132       B->ops->mult    = MatMult_SeqBAIJ_6;
3133       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3134       B->ops->sor     = MatSOR_SeqBAIJ_6;
3135       break;
3136     case 7:
3137       B->ops->mult    = MatMult_SeqBAIJ_7;
3138       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3139       B->ops->sor     = MatSOR_SeqBAIJ_7;
3140       break;
3141     case 15:
3142       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3143       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3144       B->ops->sor     = MatSOR_SeqBAIJ_N;
3145       break;
3146     default:
3147       B->ops->mult    = MatMult_SeqBAIJ_N;
3148       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3149       B->ops->sor     = MatSOR_SeqBAIJ_N;
3150       break;
3151     }
3152   }
3153   b->mbs = mbs;
3154   b->nbs = nbs;
3155   if (!skipallocation) {
3156     if (!b->imax) {
3157       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3158       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3159 
3160       b->free_imax_ilen = PETSC_TRUE;
3161     }
3162     /* b->ilen will count nonzeros in each block row so far. */
3163     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3164     if (!nnz) {
3165       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3166       else if (nz < 0) nz = 1;
3167       for (i=0; i<mbs; i++) b->imax[i] = nz;
3168       nz = nz*mbs;
3169     } else {
3170       nz = 0;
3171       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3172     }
3173 
3174     /* allocate the matrix space */
3175     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3176     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3177     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3178     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3179     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3180 
3181     b->singlemalloc = PETSC_TRUE;
3182     b->i[0]         = 0;
3183     for (i=1; i<mbs+1; i++) {
3184       b->i[i] = b->i[i-1] + b->imax[i-1];
3185     }
3186     b->free_a  = PETSC_TRUE;
3187     b->free_ij = PETSC_TRUE;
3188   } else {
3189     b->free_a  = PETSC_FALSE;
3190     b->free_ij = PETSC_FALSE;
3191   }
3192 
3193   b->bs2              = bs2;
3194   b->mbs              = mbs;
3195   b->nz               = 0;
3196   b->maxnz            = nz;
3197   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3198   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 #undef __FUNCT__
3203 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3204 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3205 {
3206   PetscInt       i,m,nz,nz_max=0,*nnz;
3207   PetscScalar    *values=0;
3208   PetscErrorCode ierr;
3209 
3210   PetscFunctionBegin;
3211   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3212   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3213   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3214   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3215   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3216   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3217   m    = B->rmap->n/bs;
3218 
3219   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3220   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3221   for (i=0; i<m; i++) {
3222     nz = ii[i+1]- ii[i];
3223     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3224     nz_max = PetscMax(nz_max, nz);
3225     nnz[i] = nz;
3226   }
3227   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3228   ierr = PetscFree(nnz);CHKERRQ(ierr);
3229 
3230   values = (PetscScalar*)V;
3231   if (!values) {
3232     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3233     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3234   }
3235   for (i=0; i<m; i++) {
3236     PetscInt          ncols  = ii[i+1] - ii[i];
3237     const PetscInt    *icols = jj + ii[i];
3238     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3239     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3240   }
3241   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3242   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3243   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3244   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 PETSC_EXTERN_C PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3249 PETSC_EXTERN_C PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3250 #if defined(PETSC_HAVE_MUMPS)
3251 PETSC_EXTERN_C PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3252 #endif
3253 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3254 
3255 /*MC
3256    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3257    block sparse compressed row format.
3258 
3259    Options Database Keys:
3260 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3261 
3262   Level: beginner
3263 
3264 .seealso: MatCreateSeqBAIJ()
3265 M*/
3266 
3267 PETSC_EXTERN_C PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatCreate_SeqBAIJ"
3271 PETSC_EXTERN_C PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3272 {
3273   PetscErrorCode ierr;
3274   PetscMPIInt    size;
3275   Mat_SeqBAIJ    *b;
3276 
3277   PetscFunctionBegin;
3278   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3279   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3280 
3281   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3282   B->data = (void*)b;
3283   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3284 
3285   b->row          = 0;
3286   b->col          = 0;
3287   b->icol         = 0;
3288   b->reallocs     = 0;
3289   b->saved_values = 0;
3290 
3291   b->roworiented        = PETSC_TRUE;
3292   b->nonew              = 0;
3293   b->diag               = 0;
3294   b->solve_work         = 0;
3295   b->mult_work          = 0;
3296   B->spptr              = 0;
3297   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3298   b->keepnonzeropattern = PETSC_FALSE;
3299   b->xtoy               = 0;
3300   b->XtoY               = 0;
3301   B->same_nonzero       = PETSC_FALSE;
3302 
3303   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3305   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3306 #if defined(PETSC_HAVE_MUMPS)
3307   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3308 #endif
3309   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3310   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3311   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3312   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3313   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3314   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3315   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3316   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3317   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3318   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3319   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 #undef __FUNCT__
3324 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3325 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3326 {
3327   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3328   PetscErrorCode ierr;
3329   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3330 
3331   PetscFunctionBegin;
3332   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3333 
3334   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3335     c->imax           = a->imax;
3336     c->ilen           = a->ilen;
3337     c->free_imax_ilen = PETSC_FALSE;
3338   } else {
3339     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3340     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3341     for (i=0; i<mbs; i++) {
3342       c->imax[i] = a->imax[i];
3343       c->ilen[i] = a->ilen[i];
3344     }
3345     c->free_imax_ilen = PETSC_TRUE;
3346   }
3347 
3348   /* allocate the matrix space */
3349   if (mallocmatspace) {
3350     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3351       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3352       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3353       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3354 
3355       c->i            = a->i;
3356       c->j            = a->j;
3357       c->singlemalloc = PETSC_FALSE;
3358       c->free_a       = PETSC_TRUE;
3359       c->free_ij      = PETSC_FALSE;
3360       c->parent       = A;
3361       C->preallocated = PETSC_TRUE;
3362       C->assembled    = PETSC_TRUE;
3363 
3364       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3365       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3366       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3367     } else {
3368       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3369       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3370 
3371       c->singlemalloc = PETSC_TRUE;
3372       c->free_a       = PETSC_TRUE;
3373       c->free_ij      = PETSC_TRUE;
3374 
3375       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3376       if (mbs > 0) {
3377         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3378         if (cpvalues == MAT_COPY_VALUES) {
3379           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3380         } else {
3381           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3382         }
3383       }
3384       C->preallocated = PETSC_TRUE;
3385       C->assembled    = PETSC_TRUE;
3386     }
3387   }
3388 
3389   c->roworiented = a->roworiented;
3390   c->nonew       = a->nonew;
3391 
3392   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3393   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3394 
3395   c->bs2         = a->bs2;
3396   c->mbs         = a->mbs;
3397   c->nbs         = a->nbs;
3398 
3399   if (a->diag) {
3400     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3401       c->diag      = a->diag;
3402       c->free_diag = PETSC_FALSE;
3403     } else {
3404       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3405       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3406       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3407       c->free_diag = PETSC_TRUE;
3408     }
3409   } else c->diag = 0;
3410 
3411   c->nz         = a->nz;
3412   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3413   c->solve_work = 0;
3414   c->mult_work  = 0;
3415 
3416   c->compressedrow.use   = a->compressedrow.use;
3417   c->compressedrow.nrows = a->compressedrow.nrows;
3418   c->compressedrow.check = a->compressedrow.check;
3419   if (a->compressedrow.use) {
3420     i    = a->compressedrow.nrows;
3421     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3422     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3423     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3424     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3425   } else {
3426     c->compressedrow.use    = PETSC_FALSE;
3427     c->compressedrow.i      = NULL;
3428     c->compressedrow.rindex = NULL;
3429   }
3430   C->same_nonzero = A->same_nonzero;
3431 
3432   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3433   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3439 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3440 {
3441   PetscErrorCode ierr;
3442 
3443   PetscFunctionBegin;
3444   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3445   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3446   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3447   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatLoad_SeqBAIJ"
3453 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3454 {
3455   Mat_SeqBAIJ    *a;
3456   PetscErrorCode ierr;
3457   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3458   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3459   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3460   PetscInt       *masked,nmask,tmp,bs2,ishift;
3461   PetscMPIInt    size;
3462   int            fd;
3463   PetscScalar    *aa;
3464   MPI_Comm       comm;
3465 
3466   PetscFunctionBegin;
3467   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3468   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3469   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3470   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3471   bs2  = bs*bs;
3472 
3473   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3474   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3475   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3476   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3477   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3478   M = header[1]; N = header[2]; nz = header[3];
3479 
3480   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3481   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3482 
3483   /*
3484      This code adds extra rows to make sure the number of rows is
3485     divisible by the blocksize
3486   */
3487   mbs        = M/bs;
3488   extra_rows = bs - M + bs*(mbs);
3489   if (extra_rows == bs) extra_rows = 0;
3490   else mbs++;
3491   if (extra_rows) {
3492     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3493   }
3494 
3495   /* Set global sizes if not already set */
3496   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3497     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3498   } else { /* Check if the matrix global sizes are correct */
3499     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3500     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3501       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3502     }
3503     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3504   }
3505 
3506   /* read in row lengths */
3507   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3508   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3509   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3510 
3511   /* read in column indices */
3512   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3513   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3514   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3515 
3516   /* loop over row lengths determining block row lengths */
3517   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3518   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3519   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3520   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3521   rowcount = 0;
3522   nzcount  = 0;
3523   for (i=0; i<mbs; i++) {
3524     nmask = 0;
3525     for (j=0; j<bs; j++) {
3526       kmax = rowlengths[rowcount];
3527       for (k=0; k<kmax; k++) {
3528         tmp = jj[nzcount++]/bs;
3529         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3530       }
3531       rowcount++;
3532     }
3533     browlengths[i] += nmask;
3534     /* zero out the mask elements we set */
3535     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3536   }
3537 
3538   /* Do preallocation  */
3539   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3540   a    = (Mat_SeqBAIJ*)newmat->data;
3541 
3542   /* set matrix "i" values */
3543   a->i[0] = 0;
3544   for (i=1; i<= mbs; i++) {
3545     a->i[i]      = a->i[i-1] + browlengths[i-1];
3546     a->ilen[i-1] = browlengths[i-1];
3547   }
3548   a->nz = 0;
3549   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3550 
3551   /* read in nonzero values */
3552   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3553   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3554   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3555 
3556   /* set "a" and "j" values into matrix */
3557   nzcount = 0; jcount = 0;
3558   for (i=0; i<mbs; i++) {
3559     nzcountb = nzcount;
3560     nmask    = 0;
3561     for (j=0; j<bs; j++) {
3562       kmax = rowlengths[i*bs+j];
3563       for (k=0; k<kmax; k++) {
3564         tmp = jj[nzcount++]/bs;
3565         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3566       }
3567     }
3568     /* sort the masked values */
3569     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3570 
3571     /* set "j" values into matrix */
3572     maskcount = 1;
3573     for (j=0; j<nmask; j++) {
3574       a->j[jcount++]  = masked[j];
3575       mask[masked[j]] = maskcount++;
3576     }
3577     /* set "a" values into matrix */
3578     ishift = bs2*a->i[i];
3579     for (j=0; j<bs; j++) {
3580       kmax = rowlengths[i*bs+j];
3581       for (k=0; k<kmax; k++) {
3582         tmp       = jj[nzcountb]/bs;
3583         block     = mask[tmp] - 1;
3584         point     = jj[nzcountb] - bs*tmp;
3585         idx       = ishift + bs2*block + j + bs*point;
3586         a->a[idx] = (MatScalar)aa[nzcountb++];
3587       }
3588     }
3589     /* zero out the mask elements we set */
3590     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3591   }
3592   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3593 
3594   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3595   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3596   ierr = PetscFree(aa);CHKERRQ(ierr);
3597   ierr = PetscFree(jj);CHKERRQ(ierr);
3598   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3599 
3600   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3601   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   PetscFunctionReturn(0);
3603 }
3604 
3605 #undef __FUNCT__
3606 #define __FUNCT__ "MatCreateSeqBAIJ"
3607 /*@C
3608    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3609    compressed row) format.  For good matrix assembly performance the
3610    user should preallocate the matrix storage by setting the parameter nz
3611    (or the array nnz).  By setting these parameters accurately, performance
3612    during matrix assembly can be increased by more than a factor of 50.
3613 
3614    Collective on MPI_Comm
3615 
3616    Input Parameters:
3617 +  comm - MPI communicator, set to PETSC_COMM_SELF
3618 .  bs - size of block
3619 .  m - number of rows
3620 .  n - number of columns
3621 .  nz - number of nonzero blocks  per block row (same for all rows)
3622 -  nnz - array containing the number of nonzero blocks in the various block rows
3623          (possibly different for each block row) or NULL
3624 
3625    Output Parameter:
3626 .  A - the matrix
3627 
3628    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3629    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3630    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3631 
3632    Options Database Keys:
3633 .   -mat_no_unroll - uses code that does not unroll the loops in the
3634                      block calculations (much slower)
3635 .    -mat_block_size - size of the blocks to use
3636 
3637    Level: intermediate
3638 
3639    Notes:
3640    The number of rows and columns must be divisible by blocksize.
3641 
3642    If the nnz parameter is given then the nz parameter is ignored
3643 
3644    A nonzero block is any block that as 1 or more nonzeros in it
3645 
3646    The block AIJ format is fully compatible with standard Fortran 77
3647    storage.  That is, the stored row and column indices can begin at
3648    either one (as in Fortran) or zero.  See the users' manual for details.
3649 
3650    Specify the preallocated storage with either nz or nnz (not both).
3651    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3652    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3653    matrices.
3654 
3655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3656 @*/
3657 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3658 {
3659   PetscErrorCode ierr;
3660 
3661   PetscFunctionBegin;
3662   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3663   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3664   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3665   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 #undef __FUNCT__
3670 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3671 /*@C
3672    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3673    per row in the matrix. For good matrix assembly performance the
3674    user should preallocate the matrix storage by setting the parameter nz
3675    (or the array nnz).  By setting these parameters accurately, performance
3676    during matrix assembly can be increased by more than a factor of 50.
3677 
3678    Collective on MPI_Comm
3679 
3680    Input Parameters:
3681 +  A - the matrix
3682 .  bs - size of block
3683 .  nz - number of block nonzeros per block row (same for all rows)
3684 -  nnz - array containing the number of block nonzeros in the various block rows
3685          (possibly different for each block row) or NULL
3686 
3687    Options Database Keys:
3688 .   -mat_no_unroll - uses code that does not unroll the loops in the
3689                      block calculations (much slower)
3690 .    -mat_block_size - size of the blocks to use
3691 
3692    Level: intermediate
3693 
3694    Notes:
3695    If the nnz parameter is given then the nz parameter is ignored
3696 
3697    You can call MatGetInfo() to get information on how effective the preallocation was;
3698    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3699    You can also run with the option -info and look for messages with the string
3700    malloc in them to see if additional memory allocation was needed.
3701 
3702    The block AIJ format is fully compatible with standard Fortran 77
3703    storage.  That is, the stored row and column indices can begin at
3704    either one (as in Fortran) or zero.  See the users' manual for details.
3705 
3706    Specify the preallocated storage with either nz or nnz (not both).
3707    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3708    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3709 
3710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3711 @*/
3712 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3713 {
3714   PetscErrorCode ierr;
3715 
3716   PetscFunctionBegin;
3717   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3718   PetscValidType(B,1);
3719   PetscValidLogicalCollectiveInt(B,bs,2);
3720   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3721   PetscFunctionReturn(0);
3722 }
3723 
3724 #undef __FUNCT__
3725 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3726 /*@C
3727    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3728    (the default sequential PETSc format).
3729 
3730    Collective on MPI_Comm
3731 
3732    Input Parameters:
3733 +  A - the matrix
3734 .  i - the indices into j for the start of each local row (starts with zero)
3735 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3736 -  v - optional values in the matrix
3737 
3738    Level: developer
3739 
3740 .keywords: matrix, aij, compressed row, sparse
3741 
3742 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3743 @*/
3744 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3745 {
3746   PetscErrorCode ierr;
3747 
3748   PetscFunctionBegin;
3749   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3750   PetscValidType(B,1);
3751   PetscValidLogicalCollectiveInt(B,bs,2);
3752   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3759 /*@
3760      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3761 
3762      Collective on MPI_Comm
3763 
3764    Input Parameters:
3765 +  comm - must be an MPI communicator of size 1
3766 .  bs - size of block
3767 .  m - number of rows
3768 .  n - number of columns
3769 .  i - row indices
3770 .  j - column indices
3771 -  a - matrix values
3772 
3773    Output Parameter:
3774 .  mat - the matrix
3775 
3776    Level: advanced
3777 
3778    Notes:
3779        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3780     once the matrix is destroyed
3781 
3782        You cannot set new nonzero locations into this matrix, that will generate an error.
3783 
3784        The i and j indices are 0 based
3785 
3786        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3787 
3788 
3789 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3790 
3791 @*/
3792 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3793 {
3794   PetscErrorCode ierr;
3795   PetscInt       ii;
3796   Mat_SeqBAIJ    *baij;
3797 
3798   PetscFunctionBegin;
3799   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3800   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3801 
3802   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3803   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3804   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3805   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3806   baij = (Mat_SeqBAIJ*)(*mat)->data;
3807   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3808   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3809 
3810   baij->i = i;
3811   baij->j = j;
3812   baij->a = a;
3813 
3814   baij->singlemalloc = PETSC_FALSE;
3815   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3816   baij->free_a       = PETSC_FALSE;
3817   baij->free_ij      = PETSC_FALSE;
3818 
3819   for (ii=0; ii<m; ii++) {
3820     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3821 #if defined(PETSC_USE_DEBUG)
3822     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3823 #endif
3824   }
3825 #if defined(PETSC_USE_DEBUG)
3826   for (ii=0; ii<baij->i[m]; ii++) {
3827     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3828     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3829   }
3830 #endif
3831 
3832   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834   PetscFunctionReturn(0);
3835 }
3836