xref: /petsc/src/mat/impls/baij/seq/baij.c (revision b2ec919abbf0a8f8b84d3929169bda8d69c39203)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,x1,s1;
133   const PetscScalar *b;
134   const MatScalar   *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode    ierr;
136   PetscInt          m = a->mbs,i,i2,nz,j;
137   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
154   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
155 
156   if (flag & SOR_ZERO_INITIAL_GUESS) {
157     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
158       x[0]   = b[0]*idiag[0];
159       i2     = 1;
160       idiag += 1;
161       for (i=1; i<m; i++) {
162         v  = aa + ai[i];
163         vi = aj + ai[i];
164         nz = diag[i] - ai[i];
165         s1 = b[i2];
166         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
167         x[i2]  = idiag[0]*s1;
168         idiag += 1;
169         i2    += 1;
170       }
171       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
172       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
173     }
174     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
175         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
176       i2    = 0;
177       mdiag = a->idiag+a->mbs;
178       for (i=0; i<m; i++) {
179         x1     = x[i2];
180         x[i2]  = mdiag[0]*x1;
181         mdiag += 1;
182         i2    += 1;
183       }
184       ierr = PetscLogFlops(m);CHKERRQ(ierr);
185     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
186       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
187     }
188     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
189       idiag  = a->idiag+a->mbs - 1;
190       i2     = m - 1;
191       x1     = x[i2];
192       x[i2]  = idiag[0]*x1;
193       idiag -= 1;
194       i2    -= 1;
195       for (i=m-2; i>=0; i--) {
196         v  = aa + (diag[i]+1);
197         vi = aj + diag[i] + 1;
198         nz = ai[i+1] - diag[i] - 1;
199         s1 = x[i2];
200         for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]];
201         x[i2]  = idiag[0]*s1;
202         idiag -= 1;
203         i2    -= 1;
204       }
205       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
206     }
207   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
209   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
215 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
216 {
217   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
218   PetscScalar       *x,x1,x2,s1,s2;
219   const PetscScalar *b;
220   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
221   PetscErrorCode    ierr;
222   PetscInt          m = a->mbs,i,i2,nz,idx,j,it;
223   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
224 
225   PetscFunctionBegin;
226   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
227   its = its*lits;
228   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
229   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
230   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
231   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
232   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
233 
234   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
235 
236   if (!m) PetscFunctionReturn(0);
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244       x[0]   = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1]   = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249         v  = aa + 4*ai[i];
250         vi = aj + ai[i];
251         nz = diag[i] - ai[i];
252         s1 = b[i2]; s2 = b[i2+1];
253         for (j=0; j<nz; j++) {
254           idx = 2*vi[j];
255           it  = 4*j;
256           x1  = x[idx]; x2 = x[1+idx];
257           s1 -= v[it]*x1 + v[it+2]*x2;
258           s2 -= v[it+1]*x1 + v[it+3]*x2;
259         }
260         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag  += 4;
263         i2     += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag  -= 4;
290       i2     -= 2;
291       for (i=m-2; i>=0; i--) {
292         v  = aa + 4*(diag[i]+1);
293         vi = aj + diag[i] + 1;
294         nz = ai[i+1] - diag[i] - 1;
295         s1 = x[i2]; s2 = x[i2+1];
296         for (j=0; j<nz; j++) {
297           idx = 2*vi[j];
298           it  = 4*j;
299           x1  = x[idx]; x2 = x[1+idx];
300           s1 -= v[it]*x1 + v[it+2]*x2;
301           s2 -= v[it+1]*x1 + v[it+3]*x2;
302         }
303         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag  -= 4;
306         i2     -= 2;
307       }
308       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
309     }
310   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
312   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
318 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
319 {
320   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
321   PetscScalar       *x,x1,x2,x3,s1,s2,s3;
322   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
323   const PetscScalar *b;
324   PetscErrorCode    ierr;
325   PetscInt          m = a->mbs,i,i2,nz,idx;
326   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
327 
328   PetscFunctionBegin;
329   its = its*lits;
330   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
331   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
332   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
333   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
334   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
335   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
336 
337   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
338 
339   if (!m) PetscFunctionReturn(0);
340   diag  = a->diag;
341   idiag = a->idiag;
342   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
343   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
344 
345   if (flag & SOR_ZERO_INITIAL_GUESS) {
346     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
347       x[0]   = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
348       x[1]   = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
349       x[2]   = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
350       i2     = 3;
351       idiag += 9;
352       for (i=1; i<m; i++) {
353         v  = aa + 9*ai[i];
354         vi = aj + ai[i];
355         nz = diag[i] - ai[i];
356         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
357         while (nz--) {
358           idx = 3*(*vi++);
359           x1  = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
360           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
361           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
362           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
363           v  += 9;
364         }
365         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
366         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
367         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
368         idiag  += 9;
369         i2     += 3;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
373     }
374     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
375         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
376       i2    = 0;
377       mdiag = a->idiag+9*a->mbs;
378       for (i=0; i<m; i++) {
379         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
380         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
381         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
382         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
383         mdiag  += 9;
384         i2     += 3;
385       }
386       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
387     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
388       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
391       idiag   = a->idiag+9*a->mbs - 9;
392       i2      = 3*m - 3;
393       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
395       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
396       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
397       idiag  -= 9;
398       i2     -= 3;
399       for (i=m-2; i>=0; i--) {
400         v  = aa + 9*(diag[i]+1);
401         vi = aj + diag[i] + 1;
402         nz = ai[i+1] - diag[i] - 1;
403         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
404         while (nz--) {
405           idx = 3*(*vi++);
406           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
407           s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
408           s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
409           s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
410           v  += 9;
411         }
412         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
413         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
414         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
415         idiag  -= 9;
416         i2     -= 3;
417       }
418       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
419     }
420   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
421   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
422   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
428 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
429 {
430   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
431   PetscScalar       *x,x1,x2,x3,x4,s1,s2,s3,s4;
432   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
433   const PetscScalar *b;
434   PetscErrorCode    ierr;
435   PetscInt          m = a->mbs,i,i2,nz,idx;
436   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
437 
438   PetscFunctionBegin;
439   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
440   its = its*lits;
441   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
442   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
443   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
444   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
445   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
446 
447   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
448 
449   if (!m) PetscFunctionReturn(0);
450   diag  = a->diag;
451   idiag = a->idiag;
452   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
453   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
454 
455   if (flag & SOR_ZERO_INITIAL_GUESS) {
456     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
457       x[0]   = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
458       x[1]   = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
459       x[2]   = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
460       x[3]   = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
461       i2     = 4;
462       idiag += 16;
463       for (i=1; i<m; i++) {
464         v  = aa + 16*ai[i];
465         vi = aj + ai[i];
466         nz = diag[i] - ai[i];
467         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
468         while (nz--) {
469           idx = 4*(*vi++);
470           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
471           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
472           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
473           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
474           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
475           v  += 16;
476         }
477         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
478         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
479         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
480         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
481         idiag  += 16;
482         i2     += 4;
483       }
484       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
485       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
486     }
487     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
488         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
489       i2    = 0;
490       mdiag = a->idiag+16*a->mbs;
491       for (i=0; i<m; i++) {
492         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
493         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
494         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
495         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
496         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
497         mdiag  += 16;
498         i2     += 4;
499       }
500       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
501     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
502       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
503     }
504     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
505       idiag   = a->idiag+16*a->mbs - 16;
506       i2      = 4*m - 4;
507       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
509       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
510       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
511       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
512       idiag  -= 16;
513       i2     -= 4;
514       for (i=m-2; i>=0; i--) {
515         v  = aa + 16*(diag[i]+1);
516         vi = aj + diag[i] + 1;
517         nz = ai[i+1] - diag[i] - 1;
518         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
519         while (nz--) {
520           idx = 4*(*vi++);
521           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
522           s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
523           s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
524           s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
525           s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
526           v  += 16;
527         }
528         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
529         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
530         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
531         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
532         idiag  -= 16;
533         i2     -= 4;
534       }
535       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
536     }
537   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
538   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
546 {
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   PetscScalar       *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
549   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
550   const PetscScalar *b;
551   PetscErrorCode    ierr;
552   PetscInt          m = a->mbs,i,i2,nz,idx;
553   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
554 
555   PetscFunctionBegin;
556   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
557   its = its*lits;
558   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
559   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
560   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
561   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
562   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
563 
564   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
565 
566   if (!m) PetscFunctionReturn(0);
567   diag  = a->diag;
568   idiag = a->idiag;
569   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
570   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
571 
572   if (flag & SOR_ZERO_INITIAL_GUESS) {
573     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
574       x[0]   = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
575       x[1]   = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
576       x[2]   = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
577       x[3]   = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
578       x[4]   = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
579       i2     = 5;
580       idiag += 25;
581       for (i=1; i<m; i++) {
582         v  = aa + 25*ai[i];
583         vi = aj + ai[i];
584         nz = diag[i] - ai[i];
585         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
586         while (nz--) {
587           idx = 5*(*vi++);
588           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
589           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
590           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
591           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
592           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
593           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
594           v  += 25;
595         }
596         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
597         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
598         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
599         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
600         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
601         idiag  += 25;
602         i2     += 5;
603       }
604       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
605       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
606     }
607     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
608         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
609       i2    = 0;
610       mdiag = a->idiag+25*a->mbs;
611       for (i=0; i<m; i++) {
612         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
613         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
614         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
615         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
616         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
617         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
618         mdiag  += 25;
619         i2     += 5;
620       }
621       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
622     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
623       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
624     }
625     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       idiag   = a->idiag+25*a->mbs - 25;
627       i2      = 5*m - 5;
628       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
630       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
631       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
632       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
633       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
634       idiag  -= 25;
635       i2     -= 5;
636       for (i=m-2; i>=0; i--) {
637         v  = aa + 25*(diag[i]+1);
638         vi = aj + diag[i] + 1;
639         nz = ai[i+1] - diag[i] - 1;
640         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
641         while (nz--) {
642           idx = 5*(*vi++);
643           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
644           s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
645           s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
646           s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
647           s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
648           s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
649           v  += 25;
650         }
651         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
652         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
653         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
654         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
655         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
656         idiag  -= 25;
657         i2     -= 5;
658       }
659       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
660     }
661   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
662   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
663   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
669 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
670 {
671   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
672   PetscScalar       *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
673   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
674   const PetscScalar *b;
675   PetscErrorCode    ierr;
676   PetscInt          m = a->mbs,i,i2,nz,idx;
677   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
678 
679   PetscFunctionBegin;
680   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
681   its = its*lits;
682   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
683   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
684   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
685   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
686   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
687 
688   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
689 
690   if (!m) PetscFunctionReturn(0);
691   diag  = a->diag;
692   idiag = a->idiag;
693   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
694   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
695 
696   if (flag & SOR_ZERO_INITIAL_GUESS) {
697     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
698       x[0]   = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
699       x[1]   = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
700       x[2]   = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
701       x[3]   = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
702       x[4]   = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
703       x[5]   = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
704       i2     = 6;
705       idiag += 36;
706       for (i=1; i<m; i++) {
707         v  = aa + 36*ai[i];
708         vi = aj + ai[i];
709         nz = diag[i] - ai[i];
710         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
711         while (nz--) {
712           idx = 6*(*vi++);
713           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
714           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
715           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
716           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
717           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
718           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
719           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
720           v  += 36;
721         }
722         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
723         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
724         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
725         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
726         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
727         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
728         idiag  += 36;
729         i2     += 6;
730       }
731       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
732       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
733     }
734     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
735         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
736       i2    = 0;
737       mdiag = a->idiag+36*a->mbs;
738       for (i=0; i<m; i++) {
739         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
740         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
741         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
742         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
743         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
744         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
745         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
746         mdiag  += 36;
747         i2     += 6;
748       }
749       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
750     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
751       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
752     }
753     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
754       idiag   = a->idiag+36*a->mbs - 36;
755       i2      = 6*m - 6;
756       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
758       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
759       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
760       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
761       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
762       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
763       idiag  -= 36;
764       i2     -= 6;
765       for (i=m-2; i>=0; i--) {
766         v  = aa + 36*(diag[i]+1);
767         vi = aj + diag[i] + 1;
768         nz = ai[i+1] - diag[i] - 1;
769         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
770         while (nz--) {
771           idx = 6*(*vi++);
772           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
773           s1 -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
774           s2 -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
775           s3 -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
776           s4 -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
777           s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
778           s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
779           v  += 36;
780         }
781         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
782         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
783         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
784         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
785         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
786         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
787         idiag  -= 36;
788         i2     -= 6;
789       }
790       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
791     }
792   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
793   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
794   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
800 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
801 {
802   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
803   PetscScalar       *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
804   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
805   const PetscScalar *b;
806   PetscErrorCode    ierr;
807   PetscInt          m = a->mbs,i,i2,nz,idx;
808   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
809 
810   PetscFunctionBegin;
811   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
812   its = its*lits;
813   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
814   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
815   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
816   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
817   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
818 
819   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
820 
821   if (!m) PetscFunctionReturn(0);
822   diag  = a->diag;
823   idiag = a->idiag;
824   ierr  = VecGetArray(xx,&x);CHKERRQ(ierr);
825   ierr  = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
826 
827   if (flag & SOR_ZERO_INITIAL_GUESS) {
828     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
829       x[0]   = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
830       x[1]   = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
831       x[2]   = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
832       x[3]   = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
833       x[4]   = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
834       x[5]   = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
835       x[6]   = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
836       i2     = 7;
837       idiag += 49;
838       for (i=1; i<m; i++) {
839         v  = aa + 49*ai[i];
840         vi = aj + ai[i];
841         nz = diag[i] - ai[i];
842         s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
843         while (nz--) {
844           idx = 7*(*vi++);
845           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
846           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
847           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
848           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
849           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
850           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
851           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
852           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
853           v  += 49;
854         }
855         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
856         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
857         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
858         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
859         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
860         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
861         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
862         idiag  += 49;
863         i2     += 7;
864       }
865       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
866       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
867     }
868     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
869         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
870       i2    = 0;
871       mdiag = a->idiag+49*a->mbs;
872       for (i=0; i<m; i++) {
873         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
874         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
875         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
876         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
877         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
878         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
879         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
880         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
881         mdiag  += 49;
882         i2     += 7;
883       }
884       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
885     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
886       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
887     }
888     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
889       idiag   = a->idiag+49*a->mbs - 49;
890       i2      = 7*m - 7;
891       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
893       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
894       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
895       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
896       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
897       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
898       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
899       idiag  -= 49;
900       i2     -= 7;
901       for (i=m-2; i>=0; i--) {
902         v  = aa + 49*(diag[i]+1);
903         vi = aj + diag[i] + 1;
904         nz = ai[i+1] - diag[i] - 1;
905         s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
906         while (nz--) {
907           idx = 7*(*vi++);
908           x1  = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
909           s1 -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
910           s2 -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
911           s3 -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
912           s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
913           s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
914           s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
915           s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
916           v  += 49;
917         }
918         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
919         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
920         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
921         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
922         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
923         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
924         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
925         idiag  -= 49;
926         i2     -= 7;
927       }
928       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
929     }
930   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
931   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
932   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
938 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
939 {
940   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
941   PetscScalar       *x,*work,*w,*workt;
942   const MatScalar   *v,*aa = a->a, *idiag,*mdiag;
943   const PetscScalar *b;
944   PetscErrorCode    ierr;
945   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
946   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
947 
948   PetscFunctionBegin;
949   its = its*lits;
950   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
951   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
952   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
953   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
954   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
955   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
956 
957   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
958 
959   if (!m) PetscFunctionReturn(0);
960   diag  = a->diag;
961   idiag = a->idiag;
962   if (!a->mult_work) {
963     k    = PetscMax(A->rmap->n,A->cmap->n);
964     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
965   }
966   work = a->mult_work;
967   if (!a->sor_work) {
968     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
969   }
970   w = a->sor_work;
971 
972   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
973   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
974 
975   if (flag & SOR_ZERO_INITIAL_GUESS) {
976     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
977       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
978       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
979       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
980       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
981       i2     = bs;
982       idiag += bs2;
983       for (i=1; i<m; i++) {
984         v  = aa + bs2*ai[i];
985         vi = aj + ai[i];
986         nz = diag[i] - ai[i];
987 
988         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
989         /* copy all rows of x that are needed into contiguous space */
990         workt = work;
991         for (j=0; j<nz; j++) {
992           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
993           workt += bs;
994         }
995         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
996         /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
997          while (nz--) {
998            idx  = N*(*vi++);
999            x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1000            s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1001            s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1002            s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1003            v   += N2;
1004            } */
1005 
1006         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1007         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1008         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1009         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1010 
1011         idiag += bs2;
1012         i2    += bs;
1013       }
1014       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1015       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1016     }
1017     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1018         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1019       i2    = 0;
1020       mdiag = a->idiag+bs2*a->mbs;
1021       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1022       for (i=0; i<m; i++) {
1023         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1024         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1025         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1026         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1027         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1028 
1029         mdiag += bs2;
1030         i2    += bs;
1031       }
1032       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1033     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1034       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1035     }
1036     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1037       idiag = a->idiag+bs2*a->mbs - bs2;
1038       i2    = bs*m - bs;
1039       ierr  = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1040       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1041       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1042       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1043       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1044       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1045       idiag -= bs2;
1046       i2    -= bs;
1047       for (i=m-2; i>=0; i--) {
1048         v  = aa + bs2*(diag[i]+1);
1049         vi = aj + diag[i] + 1;
1050         nz = ai[i+1] - diag[i] - 1;
1051 
1052         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053         /* copy all rows of x that are needed into contiguous space */
1054         workt = work;
1055         for (j=0; j<nz; j++) {
1056           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1057           workt += bs;
1058         }
1059         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1060         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1061         while (nz--) {
1062           idx  = N*(*vi++);
1063           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1064           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1065           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1066           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1067           v   += N2;
1068           } */
1069 
1070         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1071         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1072         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1073         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1074         idiag -= bs2;
1075         i2    -= bs;
1076       }
1077       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1078     }
1079   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1080   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1081   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /*
1086     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1087 */
1088 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1089 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1090 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1091 #define matsetvaluesblocked4_ matsetvaluesblocked4
1092 #endif
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "matsetvaluesblocked4_"
1096 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1097 {
1098   Mat               A  = *AA;
1099   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1100   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1101   PetscInt          *ai    =a->i,*ailen=a->ilen;
1102   PetscInt          *aj    =a->j,stepval,lastcol = -1;
1103   const PetscScalar *value = v;
1104   MatScalar         *ap,*aa = a->a,*bap;
1105 
1106   PetscFunctionBegin;
1107   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1108   stepval = (n-1)*4;
1109   for (k=0; k<m; k++) { /* loop over added rows */
1110     row  = im[k];
1111     rp   = aj + ai[row];
1112     ap   = aa + 16*ai[row];
1113     nrow = ailen[row];
1114     low  = 0;
1115     high = nrow;
1116     for (l=0; l<n; l++) { /* loop over added columns */
1117       col = in[l];
1118       if (col <= lastcol)  low = 0;
1119       else                high = nrow;
1120       lastcol = col;
1121       value   = v + k*(stepval+4 + l)*4;
1122       while (high-low > 7) {
1123         t = (low+high)/2;
1124         if (rp[t] > col) high = t;
1125         else             low  = t;
1126       }
1127       for (i=low; i<high; i++) {
1128         if (rp[i] > col) break;
1129         if (rp[i] == col) {
1130           bap = ap +  16*i;
1131           for (ii=0; ii<4; ii++,value+=stepval) {
1132             for (jj=ii; jj<16; jj+=4) {
1133               bap[jj] += *value++;
1134             }
1135           }
1136           goto noinsert2;
1137         }
1138       }
1139       N = nrow++ - 1;
1140       high++; /* added new column index thus must search to one higher than before */
1141       /* shift up all the later entries in this row */
1142       for (ii=N; ii>=i; ii--) {
1143         rp[ii+1] = rp[ii];
1144         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1145       }
1146       if (N >= i) {
1147         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1148       }
1149       rp[i] = col;
1150       bap   = ap +  16*i;
1151       for (ii=0; ii<4; ii++,value+=stepval) {
1152         for (jj=ii; jj<16; jj+=4) {
1153           bap[jj] = *value++;
1154         }
1155       }
1156       noinsert2:;
1157       low = i;
1158     }
1159     ailen[row] = nrow;
1160   }
1161   PetscFunctionReturnVoid();
1162 }
1163 
1164 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1165 #define matsetvalues4_ MATSETVALUES4
1166 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1167 #define matsetvalues4_ matsetvalues4
1168 #endif
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatSetValues4_"
1172 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1173 {
1174   Mat         A  = *AA;
1175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1176   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1177   PetscInt    *ai=a->i,*ailen=a->ilen;
1178   PetscInt    *aj=a->j,brow,bcol;
1179   PetscInt    ridx,cidx,lastcol = -1;
1180   MatScalar   *ap,value,*aa=a->a,*bap;
1181 
1182   PetscFunctionBegin;
1183   for (k=0; k<m; k++) { /* loop over added rows */
1184     row  = im[k]; brow = row/4;
1185     rp   = aj + ai[brow];
1186     ap   = aa + 16*ai[brow];
1187     nrow = ailen[brow];
1188     low  = 0;
1189     high = nrow;
1190     for (l=0; l<n; l++) { /* loop over added columns */
1191       col   = in[l]; bcol = col/4;
1192       ridx  = row % 4; cidx = col % 4;
1193       value = v[l + k*n];
1194       if (col <= lastcol)  low = 0;
1195       else                high = nrow;
1196       lastcol = col;
1197       while (high-low > 7) {
1198         t = (low+high)/2;
1199         if (rp[t] > bcol) high = t;
1200         else              low  = t;
1201       }
1202       for (i=low; i<high; i++) {
1203         if (rp[i] > bcol) break;
1204         if (rp[i] == bcol) {
1205           bap   = ap +  16*i + 4*cidx + ridx;
1206           *bap += value;
1207           goto noinsert1;
1208         }
1209       }
1210       N = nrow++ - 1;
1211       high++; /* added new column thus must search to one higher than before */
1212       /* shift up all the later entries in this row */
1213       for (ii=N; ii>=i; ii--) {
1214         rp[ii+1] = rp[ii];
1215         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1216       }
1217       if (N>=i) {
1218         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1219       }
1220       rp[i]                    = bcol;
1221       ap[16*i + 4*cidx + ridx] = value;
1222 noinsert1:;
1223       low = i;
1224     }
1225     ailen[brow] = nrow;
1226   }
1227   PetscFunctionReturnVoid();
1228 }
1229 
1230 /*
1231      Checks for missing diagonals
1232 */
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1236 {
1237   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239   PetscInt       *diag,*jj = a->j,i;
1240 
1241   PetscFunctionBegin;
1242   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1243   *missing = PETSC_FALSE;
1244   if (A->rmap->n > 0 && !jj) {
1245     *missing = PETSC_TRUE;
1246     if (d) *d = 0;
1247     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1248   } else {
1249     diag = a->diag;
1250     for (i=0; i<a->mbs; i++) {
1251       if (jj[diag[i]] != i) {
1252         *missing = PETSC_TRUE;
1253         if (d) *d = i;
1254         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1255         break;
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1265 {
1266   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268   PetscInt       i,j,m = a->mbs;
1269 
1270   PetscFunctionBegin;
1271   if (!a->diag) {
1272     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1273     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1274     a->free_diag = PETSC_TRUE;
1275   }
1276   for (i=0; i<m; i++) {
1277     a->diag[i] = a->i[i+1];
1278     for (j=a->i[i]; j<a->i[i+1]; j++) {
1279       if (a->j[j] == i) {
1280         a->diag[i] = j;
1281         break;
1282       }
1283     }
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1292 {
1293   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1296   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1297 
1298   PetscFunctionBegin;
1299   *nn = n;
1300   if (!ia) PetscFunctionReturn(0);
1301   if (symmetric) {
1302     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1303     nz   = tia[n];
1304   } else {
1305     tia = a->i; tja = a->j;
1306   }
1307 
1308   if (!blockcompressed && bs > 1) {
1309     (*nn) *= bs;
1310     /* malloc & create the natural set of indices */
1311     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1312     if (n) {
1313       (*ia)[0] = 0;
1314       for (j=1; j<bs; j++) {
1315         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1316       }
1317     }
1318 
1319     for (i=1; i<n; i++) {
1320       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1321       for (j=1; j<bs; j++) {
1322         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1323       }
1324     }
1325     if (n) {
1326       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1327     }
1328 
1329     if (inja) {
1330       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1331       cnt = 0;
1332       for (i=0; i<n; i++) {
1333         for (j=0; j<bs; j++) {
1334           for (k=tia[i]; k<tia[i+1]; k++) {
1335             for (l=0; l<bs; l++) {
1336               (*ja)[cnt++] = bs*tja[k] + l;
1337             }
1338           }
1339         }
1340       }
1341     }
1342 
1343     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1344       ierr = PetscFree(tia);CHKERRQ(ierr);
1345       ierr = PetscFree(tja);CHKERRQ(ierr);
1346     }
1347   } else if (oshift == 1) {
1348     if (symmetric) {
1349       nz = tia[A->rmap->n/bs];
1350       /*  add 1 to i and j indices */
1351       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1352       *ia = tia;
1353       if (ja) {
1354         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1355         *ja = tja;
1356       }
1357     } else {
1358       nz = a->i[A->rmap->n/bs];
1359       /* malloc space and  add 1 to i and j indices */
1360       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1361       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1362       if (ja) {
1363         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1364         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1365       }
1366     }
1367   } else {
1368     *ia = tia;
1369     if (ja) *ja = tja;
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1377 {
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   if (!ia) PetscFunctionReturn(0);
1382   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1383     ierr = PetscFree(*ia);CHKERRQ(ierr);
1384     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1392 {
1393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397 #if defined(PETSC_USE_LOG)
1398   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1399 #endif
1400   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1401   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1402   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1403   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1404   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1405   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1406   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1407   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1408   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1409   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1410   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1411   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1412   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1413 
1414   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1415   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1416   ierr = PetscFree(A->data);CHKERRQ(ierr);
1417 
1418   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
1425   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1435 {
1436   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1437   PetscErrorCode ierr;
1438 
1439   PetscFunctionBegin;
1440   switch (op) {
1441   case MAT_ROW_ORIENTED:
1442     a->roworiented = flg;
1443     break;
1444   case MAT_KEEP_NONZERO_PATTERN:
1445     a->keepnonzeropattern = flg;
1446     break;
1447   case MAT_NEW_NONZERO_LOCATIONS:
1448     a->nonew = (flg ? 0 : 1);
1449     break;
1450   case MAT_NEW_NONZERO_LOCATION_ERR:
1451     a->nonew = (flg ? -1 : 0);
1452     break;
1453   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1454     a->nonew = (flg ? -2 : 0);
1455     break;
1456   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1457     a->nounused = (flg ? -1 : 0);
1458     break;
1459   case MAT_CHECK_COMPRESSED_ROW:
1460     a->compressedrow.check = flg;
1461     break;
1462   case MAT_NEW_DIAGONALS:
1463   case MAT_IGNORE_OFF_PROC_ENTRIES:
1464   case MAT_USE_HASH_TABLE:
1465     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1466     break;
1467   case MAT_SPD:
1468   case MAT_SYMMETRIC:
1469   case MAT_STRUCTURALLY_SYMMETRIC:
1470   case MAT_HERMITIAN:
1471   case MAT_SYMMETRY_ETERNAL:
1472     /* These options are handled directly by MatSetOption() */
1473     break;
1474   default:
1475     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1487   MatScalar      *aa,*aa_i;
1488   PetscScalar    *v_i;
1489 
1490   PetscFunctionBegin;
1491   bs  = A->rmap->bs;
1492   ai  = a->i;
1493   aj  = a->j;
1494   aa  = a->a;
1495   bs2 = a->bs2;
1496 
1497   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1498 
1499   bn  = row/bs;   /* Block number */
1500   bp  = row % bs; /* Block Position */
1501   M   = ai[bn+1] - ai[bn];
1502   *nz = bs*M;
1503 
1504   if (v) {
1505     *v = 0;
1506     if (*nz) {
1507       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1508       for (i=0; i<M; i++) { /* for each block in the block row */
1509         v_i  = *v + i*bs;
1510         aa_i = aa + bs2*(ai[bn] + i);
1511         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1512       }
1513     }
1514   }
1515 
1516   if (idx) {
1517     *idx = 0;
1518     if (*nz) {
1519       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1520       for (i=0; i<M; i++) { /* for each block in the block row */
1521         idx_i = *idx + i*bs;
1522         itmp  = bs*aj[ai[bn] + i];
1523         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1524       }
1525     }
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1538   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1547 {
1548   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1549   Mat            C;
1550   PetscErrorCode ierr;
1551   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1552   PetscInt       *rows,*cols,bs2=a->bs2;
1553   MatScalar      *array;
1554 
1555   PetscFunctionBegin;
1556   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1557   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1558     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1559     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1560 
1561     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1562     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1563     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1564     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1565     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1566     ierr = PetscFree(col);CHKERRQ(ierr);
1567   } else {
1568     C = *B;
1569   }
1570 
1571   array = a->a;
1572   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1573   for (i=0; i<mbs; i++) {
1574     cols[0] = i*bs;
1575     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1576     len = ai[i+1] - ai[i];
1577     for (j=0; j<len; j++) {
1578       rows[0] = (*aj++)*bs;
1579       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1580       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1581       array += bs2;
1582     }
1583   }
1584   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1585 
1586   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1587   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1588 
1589   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1590     *B = C;
1591   } else {
1592     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1599 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1600 {
1601   PetscErrorCode ierr;
1602   Mat            Btrans;
1603 
1604   PetscFunctionBegin;
1605   *f   = PETSC_FALSE;
1606   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1607   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1608   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1614 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1615 {
1616   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1617   PetscErrorCode ierr;
1618   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1619   int            fd;
1620   PetscScalar    *aa;
1621   FILE           *file;
1622 
1623   PetscFunctionBegin;
1624   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1625   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1626   col_lens[0] = MAT_FILE_CLASSID;
1627 
1628   col_lens[1] = A->rmap->N;
1629   col_lens[2] = A->cmap->n;
1630   col_lens[3] = a->nz*bs2;
1631 
1632   /* store lengths of each row and write (including header) to file */
1633   count = 0;
1634   for (i=0; i<a->mbs; i++) {
1635     for (j=0; j<bs; j++) {
1636       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1637     }
1638   }
1639   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1640   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1641 
1642   /* store column indices (zero start index) */
1643   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1644   count = 0;
1645   for (i=0; i<a->mbs; i++) {
1646     for (j=0; j<bs; j++) {
1647       for (k=a->i[i]; k<a->i[i+1]; k++) {
1648         for (l=0; l<bs; l++) {
1649           jj[count++] = bs*a->j[k] + l;
1650         }
1651       }
1652     }
1653   }
1654   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1655   ierr = PetscFree(jj);CHKERRQ(ierr);
1656 
1657   /* store nonzero values */
1658   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1659   count = 0;
1660   for (i=0; i<a->mbs; i++) {
1661     for (j=0; j<bs; j++) {
1662       for (k=a->i[i]; k<a->i[i+1]; k++) {
1663         for (l=0; l<bs; l++) {
1664           aa[count++] = a->a[bs2*k + l*bs + j];
1665         }
1666       }
1667     }
1668   }
1669   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1670   ierr = PetscFree(aa);CHKERRQ(ierr);
1671 
1672   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1673   if (file) {
1674     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1681 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1682 {
1683   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1684   PetscErrorCode    ierr;
1685   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1686   PetscViewerFormat format;
1687 
1688   PetscFunctionBegin;
1689   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1690   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1691     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1692   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1693     Mat aij;
1694     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1695     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1696     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1697   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1698       PetscFunctionReturn(0);
1699   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1700     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1701     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1702     for (i=0; i<a->mbs; i++) {
1703       for (j=0; j<bs; j++) {
1704         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1705         for (k=a->i[i]; k<a->i[i+1]; k++) {
1706           for (l=0; l<bs; l++) {
1707 #if defined(PETSC_USE_COMPLEX)
1708             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1709               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1710                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1711             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1712               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1713                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1714             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1715               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1716             }
1717 #else
1718             if (a->a[bs2*k + l*bs + j] != 0.0) {
1719               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1720             }
1721 #endif
1722           }
1723         }
1724         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1725       }
1726     }
1727     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1728   } else {
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1730     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1731     for (i=0; i<a->mbs; i++) {
1732       for (j=0; j<bs; j++) {
1733         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1734         for (k=a->i[i]; k<a->i[i+1]; k++) {
1735           for (l=0; l<bs; l++) {
1736 #if defined(PETSC_USE_COMPLEX)
1737             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1739                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1740             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1741               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1742                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1743             } else {
1744               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1745             }
1746 #else
1747             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1748 #endif
1749           }
1750         }
1751         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1752       }
1753     }
1754     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1755   }
1756   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #include <petscdraw.h>
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1763 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1764 {
1765   Mat               A = (Mat) Aa;
1766   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1767   PetscErrorCode    ierr;
1768   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1769   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1770   MatScalar         *aa;
1771   PetscViewer       viewer;
1772   PetscViewerFormat format;
1773 
1774   PetscFunctionBegin;
1775   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1776   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1777 
1778   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1779 
1780   /* loop over matrix elements drawing boxes */
1781 
1782   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1783     color = PETSC_DRAW_BLUE;
1784     for (i=0,row=0; i<mbs; i++,row+=bs) {
1785       for (j=a->i[i]; j<a->i[i+1]; j++) {
1786         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1787         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1788         aa  = a->a + j*bs2;
1789         for (k=0; k<bs; k++) {
1790           for (l=0; l<bs; l++) {
1791             if (PetscRealPart(*aa++) >=  0.) continue;
1792             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1793           }
1794         }
1795       }
1796     }
1797     color = PETSC_DRAW_CYAN;
1798     for (i=0,row=0; i<mbs; i++,row+=bs) {
1799       for (j=a->i[i]; j<a->i[i+1]; j++) {
1800         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1801         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1802         aa  = a->a + j*bs2;
1803         for (k=0; k<bs; k++) {
1804           for (l=0; l<bs; l++) {
1805             if (PetscRealPart(*aa++) != 0.) continue;
1806             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1807           }
1808         }
1809       }
1810     }
1811     color = PETSC_DRAW_RED;
1812     for (i=0,row=0; i<mbs; i++,row+=bs) {
1813       for (j=a->i[i]; j<a->i[i+1]; j++) {
1814         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1815         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1816         aa  = a->a + j*bs2;
1817         for (k=0; k<bs; k++) {
1818           for (l=0; l<bs; l++) {
1819             if (PetscRealPart(*aa++) <= 0.) continue;
1820             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1821           }
1822         }
1823       }
1824     }
1825   } else {
1826     /* use contour shading to indicate magnitude of values */
1827     /* first determine max of all nonzero values */
1828     PetscDraw popup;
1829     PetscReal scale,maxv = 0.0;
1830 
1831     for (i=0; i<a->nz*a->bs2; i++) {
1832       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1833     }
1834     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1835     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1836     if (popup) {
1837       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1838     }
1839     for (i=0,row=0; i<mbs; i++,row+=bs) {
1840       for (j=a->i[i]; j<a->i[i+1]; j++) {
1841         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1842         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1843         aa  = a->a + j*bs2;
1844         for (k=0; k<bs; k++) {
1845           for (l=0; l<bs; l++) {
1846             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1847             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1848           }
1849         }
1850       }
1851     }
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1858 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1859 {
1860   PetscErrorCode ierr;
1861   PetscReal      xl,yl,xr,yr,w,h;
1862   PetscDraw      draw;
1863   PetscBool      isnull;
1864 
1865   PetscFunctionBegin;
1866   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1867   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1868 
1869   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1870   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1871   xr  += w;    yr += h;  xl = -w;     yl = -h;
1872   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1873   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1874   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatView_SeqBAIJ"
1880 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1881 {
1882   PetscErrorCode ierr;
1883   PetscBool      iascii,isbinary,isdraw;
1884 
1885   PetscFunctionBegin;
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1887   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1888   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1889   if (iascii) {
1890     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1891   } else if (isbinary) {
1892     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1893   } else if (isdraw) {
1894     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1895   } else {
1896     Mat B;
1897     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1898     ierr = MatView(B,viewer);CHKERRQ(ierr);
1899     ierr = MatDestroy(&B);CHKERRQ(ierr);
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1907 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1908 {
1909   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1910   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1911   PetscInt    *ai = a->i,*ailen = a->ilen;
1912   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1913   MatScalar   *ap,*aa = a->a;
1914 
1915   PetscFunctionBegin;
1916   for (k=0; k<m; k++) { /* loop over rows */
1917     row = im[k]; brow = row/bs;
1918     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1919     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1920     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1921     nrow = ailen[brow];
1922     for (l=0; l<n; l++) { /* loop over columns */
1923       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1924       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1925       col  = in[l];
1926       bcol = col/bs;
1927       cidx = col%bs;
1928       ridx = row%bs;
1929       high = nrow;
1930       low  = 0; /* assume unsorted */
1931       while (high-low > 5) {
1932         t = (low+high)/2;
1933         if (rp[t] > bcol) high = t;
1934         else             low  = t;
1935       }
1936       for (i=low; i<high; i++) {
1937         if (rp[i] > bcol) break;
1938         if (rp[i] == bcol) {
1939           *v++ = ap[bs2*i+bs*cidx+ridx];
1940           goto finished;
1941         }
1942       }
1943       *v++ = 0.0;
1944 finished:;
1945     }
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1952 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1953 {
1954   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1955   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1956   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1957   PetscErrorCode    ierr;
1958   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1959   PetscBool         roworiented=a->roworiented;
1960   const PetscScalar *value     = v;
1961   MatScalar         *ap,*aa = a->a,*bap;
1962 
1963   PetscFunctionBegin;
1964   if (roworiented) {
1965     stepval = (n-1)*bs;
1966   } else {
1967     stepval = (m-1)*bs;
1968   }
1969   for (k=0; k<m; k++) { /* loop over added rows */
1970     row = im[k];
1971     if (row < 0) continue;
1972 #if defined(PETSC_USE_DEBUG)
1973     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1974 #endif
1975     rp   = aj + ai[row];
1976     ap   = aa + bs2*ai[row];
1977     rmax = imax[row];
1978     nrow = ailen[row];
1979     low  = 0;
1980     high = nrow;
1981     for (l=0; l<n; l++) { /* loop over added columns */
1982       if (in[l] < 0) continue;
1983 #if defined(PETSC_USE_DEBUG)
1984       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1985 #endif
1986       col = in[l];
1987       if (roworiented) {
1988         value = v + (k*(stepval+bs) + l)*bs;
1989       } else {
1990         value = v + (l*(stepval+bs) + k)*bs;
1991       }
1992       if (col <= lastcol) low = 0;
1993       else high = nrow;
1994       lastcol = col;
1995       while (high-low > 7) {
1996         t = (low+high)/2;
1997         if (rp[t] > col) high = t;
1998         else             low  = t;
1999       }
2000       for (i=low; i<high; i++) {
2001         if (rp[i] > col) break;
2002         if (rp[i] == col) {
2003           bap = ap +  bs2*i;
2004           if (roworiented) {
2005             if (is == ADD_VALUES) {
2006               for (ii=0; ii<bs; ii++,value+=stepval) {
2007                 for (jj=ii; jj<bs2; jj+=bs) {
2008                   bap[jj] += *value++;
2009                 }
2010               }
2011             } else {
2012               for (ii=0; ii<bs; ii++,value+=stepval) {
2013                 for (jj=ii; jj<bs2; jj+=bs) {
2014                   bap[jj] = *value++;
2015                 }
2016               }
2017             }
2018           } else {
2019             if (is == ADD_VALUES) {
2020               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2021                 for (jj=0; jj<bs; jj++) {
2022                   bap[jj] += value[jj];
2023                 }
2024                 bap += bs;
2025               }
2026             } else {
2027               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2028                 for (jj=0; jj<bs; jj++) {
2029                   bap[jj]  = value[jj];
2030                 }
2031                 bap += bs;
2032               }
2033             }
2034           }
2035           goto noinsert2;
2036         }
2037       }
2038       if (nonew == 1) goto noinsert2;
2039       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2040       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2041       N = nrow++ - 1; high++;
2042       /* shift up all the later entries in this row */
2043       for (ii=N; ii>=i; ii--) {
2044         rp[ii+1] = rp[ii];
2045         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2046       }
2047       if (N >= i) {
2048         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2049       }
2050       rp[i] = col;
2051       bap   = ap +  bs2*i;
2052       if (roworiented) {
2053         for (ii=0; ii<bs; ii++,value+=stepval) {
2054           for (jj=ii; jj<bs2; jj+=bs) {
2055             bap[jj] = *value++;
2056           }
2057         }
2058       } else {
2059         for (ii=0; ii<bs; ii++,value+=stepval) {
2060           for (jj=0; jj<bs; jj++) {
2061             *bap++ = *value++;
2062           }
2063         }
2064       }
2065 noinsert2:;
2066       low = i;
2067     }
2068     ailen[row] = nrow;
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2075 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2076 {
2077   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
2078   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2079   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
2080   PetscErrorCode ierr;
2081   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
2082   MatScalar      *aa  = a->a,*ap;
2083   PetscReal      ratio=0.6;
2084 
2085   PetscFunctionBegin;
2086   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2087 
2088   if (m) rmax = ailen[0];
2089   for (i=1; i<mbs; i++) {
2090     /* move each row back by the amount of empty slots (fshift) before it*/
2091     fshift += imax[i-1] - ailen[i-1];
2092     rmax    = PetscMax(rmax,ailen[i]);
2093     if (fshift) {
2094       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2095       N  = ailen[i];
2096       for (j=0; j<N; j++) {
2097         ip[j-fshift] = ip[j];
2098 
2099         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2100       }
2101     }
2102     ai[i] = ai[i-1] + ailen[i-1];
2103   }
2104   if (mbs) {
2105     fshift += imax[mbs-1] - ailen[mbs-1];
2106     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2107   }
2108   /* reset ilen and imax for each row */
2109   for (i=0; i<mbs; i++) {
2110     ailen[i] = imax[i] = ai[i+1] - ai[i];
2111   }
2112   a->nz = ai[mbs];
2113 
2114   /* diagonals may have moved, so kill the diagonal pointers */
2115   a->idiagvalid = PETSC_FALSE;
2116   if (fshift && a->diag) {
2117     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2118     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2119     a->diag = 0;
2120   }
2121   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2122   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2123   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2124   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2125 
2126   A->info.mallocs    += a->reallocs;
2127   a->reallocs         = 0;
2128   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2129 
2130   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2131 
2132   A->same_nonzero = PETSC_TRUE;
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 /*
2137    This function returns an array of flags which indicate the locations of contiguous
2138    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2139    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2140    Assume: sizes should be long enough to hold all the values.
2141 */
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2144 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2145 {
2146   PetscInt  i,j,k,row;
2147   PetscBool flg;
2148 
2149   PetscFunctionBegin;
2150   for (i=0,j=0; i<n; j++) {
2151     row = idx[i];
2152     if (row%bs!=0) { /* Not the begining of a block */
2153       sizes[j] = 1;
2154       i++;
2155     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2156       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2157       i++;
2158     } else { /* Begining of the block, so check if the complete block exists */
2159       flg = PETSC_TRUE;
2160       for (k=1; k<bs; k++) {
2161         if (row+k != idx[i+k]) { /* break in the block */
2162           flg = PETSC_FALSE;
2163           break;
2164         }
2165       }
2166       if (flg) { /* No break in the bs */
2167         sizes[j] = bs;
2168         i       += bs;
2169       } else {
2170         sizes[j] = 1;
2171         i++;
2172       }
2173     }
2174   }
2175   *bs_max = j;
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2181 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2182 {
2183   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2184   PetscErrorCode    ierr;
2185   PetscInt          i,j,k,count,*rows;
2186   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2187   PetscScalar       zero = 0.0;
2188   MatScalar         *aa;
2189   const PetscScalar *xx;
2190   PetscScalar       *bb;
2191 
2192   PetscFunctionBegin;
2193   /* fix right hand side if needed */
2194   if (x && b) {
2195     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2196     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2197     for (i=0; i<is_n; i++) {
2198       bb[is_idx[i]] = diag*xx[is_idx[i]];
2199     }
2200     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2201     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2202   }
2203 
2204   /* Make a copy of the IS and  sort it */
2205   /* allocate memory for rows,sizes */
2206   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2207 
2208   /* copy IS values to rows, and sort them */
2209   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2210   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2211 
2212   if (baij->keepnonzeropattern) {
2213     for (i=0; i<is_n; i++) sizes[i] = 1;
2214     bs_max          = is_n;
2215     A->same_nonzero = PETSC_TRUE;
2216   } else {
2217     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2218 
2219     A->same_nonzero = PETSC_FALSE;
2220   }
2221 
2222   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2223     row = rows[j];
2224     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2225     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2226     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2227     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2228       if (diag != (PetscScalar)0.0) {
2229         if (baij->ilen[row/bs] > 0) {
2230           baij->ilen[row/bs]       = 1;
2231           baij->j[baij->i[row/bs]] = row/bs;
2232 
2233           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2234         }
2235         /* Now insert all the diagonal values for this bs */
2236         for (k=0; k<bs; k++) {
2237           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2238         }
2239       } else { /* (diag == 0.0) */
2240         baij->ilen[row/bs] = 0;
2241       } /* end (diag == 0.0) */
2242     } else { /* (sizes[i] != bs) */
2243 #if defined(PETSC_USE_DEBUG)
2244       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2245 #endif
2246       for (k=0; k<count; k++) {
2247         aa[0] =  zero;
2248         aa   += bs;
2249       }
2250       if (diag != (PetscScalar)0.0) {
2251         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2252       }
2253     }
2254   }
2255 
2256   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2257   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2263 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2264 {
2265   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2266   PetscErrorCode    ierr;
2267   PetscInt          i,j,k,count;
2268   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2269   PetscScalar       zero = 0.0;
2270   MatScalar         *aa;
2271   const PetscScalar *xx;
2272   PetscScalar       *bb;
2273   PetscBool         *zeroed,vecs = PETSC_FALSE;
2274 
2275   PetscFunctionBegin;
2276   /* fix right hand side if needed */
2277   if (x && b) {
2278     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2279     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2280     vecs = PETSC_TRUE;
2281   }
2282   A->same_nonzero = PETSC_TRUE;
2283 
2284   /* zero the columns */
2285   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2286   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2287   for (i=0; i<is_n; i++) {
2288     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2289     zeroed[is_idx[i]] = PETSC_TRUE;
2290   }
2291   for (i=0; i<A->rmap->N; i++) {
2292     if (!zeroed[i]) {
2293       row = i/bs;
2294       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2295         for (k=0; k<bs; k++) {
2296           col = bs*baij->j[j] + k;
2297           if (zeroed[col]) {
2298             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2299             if (vecs) bb[i] -= aa[0]*xx[col];
2300             aa[0] = 0.0;
2301           }
2302         }
2303       }
2304     } else if (vecs) bb[i] = diag*xx[i];
2305   }
2306   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2307   if (vecs) {
2308     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2309     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2310   }
2311 
2312   /* zero the rows */
2313   for (i=0; i<is_n; i++) {
2314     row   = is_idx[i];
2315     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2316     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2317     for (k=0; k<count; k++) {
2318       aa[0] =  zero;
2319       aa   += bs;
2320     }
2321     if (diag != (PetscScalar)0.0) {
2322       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2323     }
2324   }
2325   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2331 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2332 {
2333   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2334   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2335   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2336   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2337   PetscErrorCode ierr;
2338   PetscInt       ridx,cidx,bs2=a->bs2;
2339   PetscBool      roworiented=a->roworiented;
2340   MatScalar      *ap,value,*aa=a->a,*bap;
2341 
2342   PetscFunctionBegin;
2343   if (v) PetscValidScalarPointer(v,6);
2344   for (k=0; k<m; k++) { /* loop over added rows */
2345     row  = im[k];
2346     brow = row/bs;
2347     if (row < 0) continue;
2348 #if defined(PETSC_USE_DEBUG)
2349     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2350 #endif
2351     rp   = aj + ai[brow];
2352     ap   = aa + bs2*ai[brow];
2353     rmax = imax[brow];
2354     nrow = ailen[brow];
2355     low  = 0;
2356     high = nrow;
2357     for (l=0; l<n; l++) { /* loop over added columns */
2358       if (in[l] < 0) continue;
2359 #if defined(PETSC_USE_DEBUG)
2360       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2361 #endif
2362       col  = in[l]; bcol = col/bs;
2363       ridx = row % bs; cidx = col % bs;
2364       if (roworiented) {
2365         value = v[l + k*n];
2366       } else {
2367         value = v[k + l*m];
2368       }
2369       if (col <= lastcol) low = 0; else high = nrow;
2370       lastcol = col;
2371       while (high-low > 7) {
2372         t = (low+high)/2;
2373         if (rp[t] > bcol) high = t;
2374         else              low  = t;
2375       }
2376       for (i=low; i<high; i++) {
2377         if (rp[i] > bcol) break;
2378         if (rp[i] == bcol) {
2379           bap = ap +  bs2*i + bs*cidx + ridx;
2380           if (is == ADD_VALUES) *bap += value;
2381           else                  *bap  = value;
2382           goto noinsert1;
2383         }
2384       }
2385       if (nonew == 1) goto noinsert1;
2386       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2387       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2388       N = nrow++ - 1; high++;
2389       /* shift up all the later entries in this row */
2390       for (ii=N; ii>=i; ii--) {
2391         rp[ii+1] = rp[ii];
2392         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2393       }
2394       if (N>=i) {
2395         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2396       }
2397       rp[i]                      = bcol;
2398       ap[bs2*i + bs*cidx + ridx] = value;
2399       a->nz++;
2400 noinsert1:;
2401       low = i;
2402     }
2403     ailen[brow] = nrow;
2404   }
2405   A->same_nonzero = PETSC_FALSE;
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2411 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2412 {
2413   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2414   Mat            outA;
2415   PetscErrorCode ierr;
2416   PetscBool      row_identity,col_identity;
2417 
2418   PetscFunctionBegin;
2419   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2420   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2421   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2422   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2423 
2424   outA            = inA;
2425   inA->factortype = MAT_FACTOR_LU;
2426 
2427   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2428 
2429   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2430   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2431   a->row = row;
2432   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2433   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2434   a->col = col;
2435 
2436   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2437   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2438   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2439   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2440 
2441   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2442   if (!a->solve_work) {
2443     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2444     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2445   }
2446   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2452 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2453 {
2454   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2455   PetscInt    i,nz,mbs;
2456 
2457   PetscFunctionBegin;
2458   nz  = baij->maxnz;
2459   mbs = baij->mbs;
2460   for (i=0; i<nz; i++) {
2461     baij->j[i] = indices[i];
2462   }
2463   baij->nz = nz;
2464   for (i=0; i<mbs; i++) {
2465     baij->ilen[i] = baij->imax[i];
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2472 /*@
2473     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2474        in the matrix.
2475 
2476   Input Parameters:
2477 +  mat - the SeqBAIJ matrix
2478 -  indices - the column indices
2479 
2480   Level: advanced
2481 
2482   Notes:
2483     This can be called if you have precomputed the nonzero structure of the
2484   matrix and want to provide it to the matrix object to improve the performance
2485   of the MatSetValues() operation.
2486 
2487     You MUST have set the correct numbers of nonzeros per row in the call to
2488   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2489 
2490     MUST be called before any calls to MatSetValues();
2491 
2492 @*/
2493 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2494 {
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2499   PetscValidPointer(indices,2);
2500   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2506 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2507 {
2508   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2509   PetscErrorCode ierr;
2510   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2511   PetscReal      atmp;
2512   PetscScalar    *x,zero = 0.0;
2513   MatScalar      *aa;
2514   PetscInt       ncols,brow,krow,kcol;
2515 
2516   PetscFunctionBegin;
2517   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2518   bs  = A->rmap->bs;
2519   aa  = a->a;
2520   ai  = a->i;
2521   aj  = a->j;
2522   mbs = a->mbs;
2523 
2524   ierr = VecSet(v,zero);CHKERRQ(ierr);
2525   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2526   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2527   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2528   for (i=0; i<mbs; i++) {
2529     ncols = ai[1] - ai[0]; ai++;
2530     brow  = bs*i;
2531     for (j=0; j<ncols; j++) {
2532       for (kcol=0; kcol<bs; kcol++) {
2533         for (krow=0; krow<bs; krow++) {
2534           atmp = PetscAbsScalar(*aa);aa++;
2535           row  = brow + krow;   /* row index */
2536           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2537           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2538         }
2539       }
2540       aj++;
2541     }
2542   }
2543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatCopy_SeqBAIJ"
2549 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2550 {
2551   PetscErrorCode ierr;
2552 
2553   PetscFunctionBegin;
2554   /* If the two matrices have the same copy implementation, use fast copy. */
2555   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2556     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2557     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2558     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2559 
2560     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2561     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2562     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2563   } else {
2564     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2571 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2572 {
2573   PetscErrorCode ierr;
2574 
2575   PetscFunctionBegin;
2576   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 #undef __FUNCT__
2581 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2582 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2583 {
2584   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2585 
2586   PetscFunctionBegin;
2587   *array = a->a;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2593 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2594 {
2595   PetscFunctionBegin;
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2601 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2602 {
2603   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2604   PetscErrorCode ierr;
2605   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2606   PetscBLASInt   one=1;
2607 
2608   PetscFunctionBegin;
2609   if (str == SAME_NONZERO_PATTERN) {
2610     PetscScalar  alpha = a;
2611     PetscBLASInt bnz;
2612     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2613     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2614   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2615     if (y->xtoy && y->XtoY != X) {
2616       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2617       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2618     }
2619     if (!y->xtoy) { /* get xtoy */
2620       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2621       y->XtoY = X;
2622       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2623     }
2624     for (i=0; i<x->nz; i++) {
2625       j = 0;
2626       while (j < bs2) {
2627         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2628         j++;
2629       }
2630     }
2631     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2632   } else {
2633     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2634   }
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2640 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2641 {
2642   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2643   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2644   MatScalar   *aa = a->a;
2645 
2646   PetscFunctionBegin;
2647   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2653 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar   *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2668 /*
2669     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2670 */
2671 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2672 {
2673   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2674   PetscErrorCode ierr;
2675   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2676   PetscInt       nz = a->i[m],row,*jj,mr,col;
2677 
2678   PetscFunctionBegin;
2679   *nn = n;
2680   if (!ia) PetscFunctionReturn(0);
2681   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2682   else {
2683     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2684     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2685     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2686     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2687     jj   = a->j;
2688     for (i=0; i<nz; i++) {
2689       collengths[jj[i]]++;
2690     }
2691     cia[0] = oshift;
2692     for (i=0; i<n; i++) {
2693       cia[i+1] = cia[i] + collengths[i];
2694     }
2695     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2696     jj   = a->j;
2697     for (row=0; row<m; row++) {
2698       mr = a->i[row+1] - a->i[row];
2699       for (i=0; i<mr; i++) {
2700         col = *jj++;
2701 
2702         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2703       }
2704     }
2705     ierr = PetscFree(collengths);CHKERRQ(ierr);
2706     *ia  = cia; *ja = cja;
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2713 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2714 {
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   if (!ia) PetscFunctionReturn(0);
2719   ierr = PetscFree(*ia);CHKERRQ(ierr);
2720   ierr = PetscFree(*ja);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2726 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2727 {
2728   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2729   PetscErrorCode ierr;
2730   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2731   PetscScalar    dx,*y,*xx,*w3_array;
2732   PetscScalar    *vscale_array;
2733   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2734   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2735   void           *fctx   = coloring->fctx;
2736   PetscBool      flg     = PETSC_FALSE;
2737   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2738   Vec            x1_tmp;
2739 
2740   PetscFunctionBegin;
2741   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2742   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2743   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2744   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2745 
2746   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2747   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2748   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2749   if (flg) {
2750     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2751   } else {
2752     PetscBool assembled;
2753     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2754     if (assembled) {
2755       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2756     }
2757   }
2758 
2759   x1_tmp = x1;
2760   if (!coloring->vscale) {
2761     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2762   }
2763 
2764   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2765     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2766   }
2767   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2768 
2769   /* Set w1 = F(x1) */
2770   if (!coloring->fset) {
2771     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2772     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2773     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2774   } else {
2775     coloring->fset = PETSC_FALSE;
2776   }
2777 
2778   if (!coloring->w3) {
2779     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2781   }
2782   w3 = coloring->w3;
2783 
2784   /* Compute all the local scale factors, including ghost points */
2785   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2786   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2787   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2788   if (ctype == IS_COLORING_GHOSTED) {
2789     col_start = 0; col_end = N;
2790   } else if (ctype == IS_COLORING_GLOBAL) {
2791     xx           = xx - start;
2792     vscale_array = vscale_array - start;
2793     col_start    = start; col_end = N + start;
2794   }
2795   for (col=col_start; col<col_end; col++) {
2796     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2797     if (coloring->htype[0] == 'w') {
2798       dx = 1.0 + unorm;
2799     } else {
2800       dx = xx[col];
2801     }
2802     if (dx == (PetscScalar)0.0) dx = 1.0;
2803     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2804     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2805     dx               *= epsilon;
2806     vscale_array[col] = (PetscScalar)1.0/dx;
2807   }
2808   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2809   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2810   if (ctype == IS_COLORING_GLOBAL) {
2811     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2812     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2813   }
2814   if (coloring->vscaleforrow) {
2815     vscaleforrow = coloring->vscaleforrow;
2816   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2817 
2818   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2819   /*
2820     Loop over each color
2821   */
2822   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2823   for (k=0; k<coloring->ncolors; k++) {
2824     coloring->currentcolor = k;
2825     for (i=0; i<bs; i++) {
2826       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2827       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2828       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2829       /*
2830         Loop over each column associated with color
2831         adding the perturbation to the vector w3.
2832       */
2833       for (l=0; l<coloring->ncolumns[k]; l++) {
2834         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2835         if (coloring->htype[0] == 'w') {
2836           dx = 1.0 + unorm;
2837         } else {
2838           dx = xx[col];
2839         }
2840         if (dx == (PetscScalar)0.0) dx = 1.0;
2841         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2842         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2843         dx *= epsilon;
2844         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2845         w3_array[col] += dx;
2846       }
2847       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2848       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2849 
2850       /*
2851         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2852         w2 = F(x1 + dx) - F(x1)
2853       */
2854       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2855       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2856       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2857       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2858 
2859       /*
2860         Loop over rows of vector, putting results into Jacobian matrix
2861       */
2862       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2863       for (l=0; l<coloring->nrows[k]; l++) {
2864         row = bs*coloring->rows[k][l];                /* local row index */
2865         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2866         for (j=0; j<bs; j++) {
2867           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2868           srows[j]  = row + start + j;
2869         }
2870         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2871       }
2872       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2873     }
2874   } /* endof for each color */
2875   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2876   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2877   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2878   ierr = PetscFree(srows);CHKERRQ(ierr);
2879 
2880   coloring->currentcolor = -1;
2881 
2882   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2885   PetscFunctionReturn(0);
2886 }
2887 
2888 /* -------------------------------------------------------------------*/
2889 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2890                                        MatGetRow_SeqBAIJ,
2891                                        MatRestoreRow_SeqBAIJ,
2892                                        MatMult_SeqBAIJ_N,
2893                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2894                                        MatMultTranspose_SeqBAIJ,
2895                                        MatMultTransposeAdd_SeqBAIJ,
2896                                        0,
2897                                        0,
2898                                        0,
2899                                /* 10*/ 0,
2900                                        MatLUFactor_SeqBAIJ,
2901                                        0,
2902                                        0,
2903                                        MatTranspose_SeqBAIJ,
2904                                /* 15*/ MatGetInfo_SeqBAIJ,
2905                                        MatEqual_SeqBAIJ,
2906                                        MatGetDiagonal_SeqBAIJ,
2907                                        MatDiagonalScale_SeqBAIJ,
2908                                        MatNorm_SeqBAIJ,
2909                                /* 20*/ 0,
2910                                        MatAssemblyEnd_SeqBAIJ,
2911                                        MatSetOption_SeqBAIJ,
2912                                        MatZeroEntries_SeqBAIJ,
2913                                /* 24*/ MatZeroRows_SeqBAIJ,
2914                                        0,
2915                                        0,
2916                                        0,
2917                                        0,
2918                                /* 29*/ MatSetUp_SeqBAIJ,
2919                                        0,
2920                                        0,
2921                                        0,
2922                                        0,
2923                                /* 34*/ MatDuplicate_SeqBAIJ,
2924                                        0,
2925                                        0,
2926                                        MatILUFactor_SeqBAIJ,
2927                                        0,
2928                                /* 39*/ MatAXPY_SeqBAIJ,
2929                                        MatGetSubMatrices_SeqBAIJ,
2930                                        MatIncreaseOverlap_SeqBAIJ,
2931                                        MatGetValues_SeqBAIJ,
2932                                        MatCopy_SeqBAIJ,
2933                                /* 44*/ 0,
2934                                        MatScale_SeqBAIJ,
2935                                        0,
2936                                        0,
2937                                        MatZeroRowsColumns_SeqBAIJ,
2938                                /* 49*/ 0,
2939                                        MatGetRowIJ_SeqBAIJ,
2940                                        MatRestoreRowIJ_SeqBAIJ,
2941                                        MatGetColumnIJ_SeqBAIJ,
2942                                        MatRestoreColumnIJ_SeqBAIJ,
2943                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2944                                        0,
2945                                        0,
2946                                        0,
2947                                        MatSetValuesBlocked_SeqBAIJ,
2948                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2949                                        MatDestroy_SeqBAIJ,
2950                                        MatView_SeqBAIJ,
2951                                        0,
2952                                        0,
2953                                /* 64*/ 0,
2954                                        0,
2955                                        0,
2956                                        0,
2957                                        0,
2958                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2959                                        0,
2960                                        MatConvert_Basic,
2961                                        0,
2962                                        0,
2963                                /* 74*/ 0,
2964                                        MatFDColoringApply_BAIJ,
2965                                        0,
2966                                        0,
2967                                        0,
2968                                /* 79*/ 0,
2969                                        0,
2970                                        0,
2971                                        0,
2972                                        MatLoad_SeqBAIJ,
2973                                /* 84*/ 0,
2974                                        0,
2975                                        0,
2976                                        0,
2977                                        0,
2978                                /* 89*/ 0,
2979                                        0,
2980                                        0,
2981                                        0,
2982                                        0,
2983                                /* 94*/ 0,
2984                                        0,
2985                                        0,
2986                                        0,
2987                                        0,
2988                                /* 99*/ 0,
2989                                        0,
2990                                        0,
2991                                        0,
2992                                        0,
2993                                /*104*/ 0,
2994                                        MatRealPart_SeqBAIJ,
2995                                        MatImaginaryPart_SeqBAIJ,
2996                                        0,
2997                                        0,
2998                                /*109*/ 0,
2999                                        0,
3000                                        0,
3001                                        0,
3002                                        MatMissingDiagonal_SeqBAIJ,
3003                                /*114*/ 0,
3004                                        0,
3005                                        0,
3006                                        0,
3007                                        0,
3008                                /*119*/ 0,
3009                                        0,
3010                                        MatMultHermitianTranspose_SeqBAIJ,
3011                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3012                                        0,
3013                                /*124*/ 0,
3014                                        0,
3015                                        MatInvertBlockDiagonal_SeqBAIJ,
3016                                        0,
3017                                        0,
3018                                /*129*/ 0,
3019                                        0,
3020                                        0,
3021                                        0,
3022                                        0,
3023                                /*134*/ 0,
3024                                        0,
3025                                        0,
3026                                        0,
3027                                        0,
3028                                /*139*/ 0,
3029                                        0
3030 };
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3034 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3035 {
3036   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3037   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3042 
3043   /* allocate space for values if not already there */
3044   if (!aij->saved_values) {
3045     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3046     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3047   }
3048 
3049   /* copy values over */
3050   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3056 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3057 {
3058   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
3059   PetscErrorCode ierr;
3060   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3061 
3062   PetscFunctionBegin;
3063   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3064   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3065 
3066   /* copy values over */
3067   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3072 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3073 
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3076 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3077 {
3078   Mat_SeqBAIJ    *b;
3079   PetscErrorCode ierr;
3080   PetscInt       i,mbs,nbs,bs2;
3081   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3082 
3083   PetscFunctionBegin;
3084   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3085   if (nz == MAT_SKIP_ALLOCATION) {
3086     skipallocation = PETSC_TRUE;
3087     nz             = 0;
3088   }
3089 
3090   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3091   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3092   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3093   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3094   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3095 
3096   B->preallocated = PETSC_TRUE;
3097 
3098   mbs = B->rmap->n/bs;
3099   nbs = B->cmap->n/bs;
3100   bs2 = bs*bs;
3101 
3102   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3103 
3104   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3105   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3106   if (nnz) {
3107     for (i=0; i<mbs; i++) {
3108       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3109       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3110     }
3111   }
3112 
3113   b    = (Mat_SeqBAIJ*)B->data;
3114   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3115   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3116   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3117 
3118   if (!flg) {
3119     switch (bs) {
3120     case 1:
3121       B->ops->mult    = MatMult_SeqBAIJ_1;
3122       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3123       B->ops->sor     = MatSOR_SeqBAIJ_1;
3124       break;
3125     case 2:
3126       B->ops->mult    = MatMult_SeqBAIJ_2;
3127       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3128       B->ops->sor     = MatSOR_SeqBAIJ_2;
3129       break;
3130     case 3:
3131       B->ops->mult    = MatMult_SeqBAIJ_3;
3132       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3133       B->ops->sor     = MatSOR_SeqBAIJ_3;
3134       break;
3135     case 4:
3136       B->ops->mult    = MatMult_SeqBAIJ_4;
3137       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3138       B->ops->sor     = MatSOR_SeqBAIJ_4;
3139       break;
3140     case 5:
3141       B->ops->mult    = MatMult_SeqBAIJ_5;
3142       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3143       B->ops->sor     = MatSOR_SeqBAIJ_5;
3144       break;
3145     case 6:
3146       B->ops->mult    = MatMult_SeqBAIJ_6;
3147       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3148       B->ops->sor     = MatSOR_SeqBAIJ_6;
3149       break;
3150     case 7:
3151       B->ops->mult    = MatMult_SeqBAIJ_7;
3152       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3153       B->ops->sor     = MatSOR_SeqBAIJ_7;
3154       break;
3155     case 15:
3156       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3157       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3158       B->ops->sor     = MatSOR_SeqBAIJ_N;
3159       break;
3160     default:
3161       B->ops->mult    = MatMult_SeqBAIJ_N;
3162       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3163       B->ops->sor     = MatSOR_SeqBAIJ_N;
3164       break;
3165     }
3166   }
3167   b->mbs = mbs;
3168   b->nbs = nbs;
3169   if (!skipallocation) {
3170     if (!b->imax) {
3171       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3172       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3173 
3174       b->free_imax_ilen = PETSC_TRUE;
3175     }
3176     /* b->ilen will count nonzeros in each block row so far. */
3177     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3178     if (!nnz) {
3179       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3180       else if (nz < 0) nz = 1;
3181       for (i=0; i<mbs; i++) b->imax[i] = nz;
3182       nz = nz*mbs;
3183     } else {
3184       nz = 0;
3185       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3186     }
3187 
3188     /* allocate the matrix space */
3189     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3190     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3191     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3192     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3193     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3194 
3195     b->singlemalloc = PETSC_TRUE;
3196     b->i[0]         = 0;
3197     for (i=1; i<mbs+1; i++) {
3198       b->i[i] = b->i[i-1] + b->imax[i-1];
3199     }
3200     b->free_a  = PETSC_TRUE;
3201     b->free_ij = PETSC_TRUE;
3202   } else {
3203     b->free_a  = PETSC_FALSE;
3204     b->free_ij = PETSC_FALSE;
3205   }
3206 
3207   b->bs2              = bs2;
3208   b->mbs              = mbs;
3209   b->nz               = 0;
3210   b->maxnz            = nz;
3211   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3212   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3218 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3219 {
3220   PetscInt       i,m,nz,nz_max=0,*nnz;
3221   PetscScalar    *values=0;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3226   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3227   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3228   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3229   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3230   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3231   m    = B->rmap->n/bs;
3232 
3233   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3234   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3235   for (i=0; i<m; i++) {
3236     nz = ii[i+1]- ii[i];
3237     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3238     nz_max = PetscMax(nz_max, nz);
3239     nnz[i] = nz;
3240   }
3241   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3242   ierr = PetscFree(nnz);CHKERRQ(ierr);
3243 
3244   values = (PetscScalar*)V;
3245   if (!values) {
3246     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3247     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3248   }
3249   for (i=0; i<m; i++) {
3250     PetscInt          ncols  = ii[i+1] - ii[i];
3251     const PetscInt    *icols = jj + ii[i];
3252     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3253     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3254   }
3255   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3256   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3258   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3263 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3264 #if defined(PETSC_HAVE_MUMPS)
3265 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3266 #endif
3267 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3268 
3269 /*MC
3270    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3271    block sparse compressed row format.
3272 
3273    Options Database Keys:
3274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3275 
3276   Level: beginner
3277 
3278 .seealso: MatCreateSeqBAIJ()
3279 M*/
3280 
3281 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3282 
3283 #undef __FUNCT__
3284 #define __FUNCT__ "MatCreate_SeqBAIJ"
3285 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3286 {
3287   PetscErrorCode ierr;
3288   PetscMPIInt    size;
3289   Mat_SeqBAIJ    *b;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3293   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3294 
3295   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3296   B->data = (void*)b;
3297   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3298 
3299   b->row          = 0;
3300   b->col          = 0;
3301   b->icol         = 0;
3302   b->reallocs     = 0;
3303   b->saved_values = 0;
3304 
3305   b->roworiented        = PETSC_TRUE;
3306   b->nonew              = 0;
3307   b->diag               = 0;
3308   b->solve_work         = 0;
3309   b->mult_work          = 0;
3310   B->spptr              = 0;
3311   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3312   b->keepnonzeropattern = PETSC_FALSE;
3313   b->xtoy               = 0;
3314   b->XtoY               = 0;
3315   B->same_nonzero       = PETSC_FALSE;
3316 
3317   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3318   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3319   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3320 #if defined(PETSC_HAVE_MUMPS)
3321   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3322 #endif
3323   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3324   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3325   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3326   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3327   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3328   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3329   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3330   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3331   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3332   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3333   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3339 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3340 {
3341   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3342   PetscErrorCode ierr;
3343   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3344 
3345   PetscFunctionBegin;
3346   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3347 
3348   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3349     c->imax           = a->imax;
3350     c->ilen           = a->ilen;
3351     c->free_imax_ilen = PETSC_FALSE;
3352   } else {
3353     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3354     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3355     for (i=0; i<mbs; i++) {
3356       c->imax[i] = a->imax[i];
3357       c->ilen[i] = a->ilen[i];
3358     }
3359     c->free_imax_ilen = PETSC_TRUE;
3360   }
3361 
3362   /* allocate the matrix space */
3363   if (mallocmatspace) {
3364     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3365       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3366       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3367       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3368 
3369       c->i            = a->i;
3370       c->j            = a->j;
3371       c->singlemalloc = PETSC_FALSE;
3372       c->free_a       = PETSC_TRUE;
3373       c->free_ij      = PETSC_FALSE;
3374       c->parent       = A;
3375       C->preallocated = PETSC_TRUE;
3376       C->assembled    = PETSC_TRUE;
3377 
3378       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3379       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3380       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3381     } else {
3382       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3383       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3384 
3385       c->singlemalloc = PETSC_TRUE;
3386       c->free_a       = PETSC_TRUE;
3387       c->free_ij      = PETSC_TRUE;
3388 
3389       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3390       if (mbs > 0) {
3391         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3392         if (cpvalues == MAT_COPY_VALUES) {
3393           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3394         } else {
3395           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3396         }
3397       }
3398       C->preallocated = PETSC_TRUE;
3399       C->assembled    = PETSC_TRUE;
3400     }
3401   }
3402 
3403   c->roworiented = a->roworiented;
3404   c->nonew       = a->nonew;
3405 
3406   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3407   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3408 
3409   c->bs2         = a->bs2;
3410   c->mbs         = a->mbs;
3411   c->nbs         = a->nbs;
3412 
3413   if (a->diag) {
3414     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3415       c->diag      = a->diag;
3416       c->free_diag = PETSC_FALSE;
3417     } else {
3418       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3419       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3420       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3421       c->free_diag = PETSC_TRUE;
3422     }
3423   } else c->diag = 0;
3424 
3425   c->nz         = a->nz;
3426   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3427   c->solve_work = 0;
3428   c->mult_work  = 0;
3429 
3430   c->compressedrow.use   = a->compressedrow.use;
3431   c->compressedrow.nrows = a->compressedrow.nrows;
3432   c->compressedrow.check = a->compressedrow.check;
3433   if (a->compressedrow.use) {
3434     i    = a->compressedrow.nrows;
3435     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3436     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3437     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3438     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3439   } else {
3440     c->compressedrow.use    = PETSC_FALSE;
3441     c->compressedrow.i      = NULL;
3442     c->compressedrow.rindex = NULL;
3443   }
3444   C->same_nonzero = A->same_nonzero;
3445 
3446   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3447   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3453 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3454 {
3455   PetscErrorCode ierr;
3456 
3457   PetscFunctionBegin;
3458   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3459   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3460   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3461   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 #undef __FUNCT__
3466 #define __FUNCT__ "MatLoad_SeqBAIJ"
3467 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3468 {
3469   Mat_SeqBAIJ    *a;
3470   PetscErrorCode ierr;
3471   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3472   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3473   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3474   PetscInt       *masked,nmask,tmp,bs2,ishift;
3475   PetscMPIInt    size;
3476   int            fd;
3477   PetscScalar    *aa;
3478   MPI_Comm       comm;
3479 
3480   PetscFunctionBegin;
3481   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3482   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3483   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3484   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3485   bs2  = bs*bs;
3486 
3487   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3488   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3489   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3490   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3491   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3492   M = header[1]; N = header[2]; nz = header[3];
3493 
3494   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3495   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3496 
3497   /*
3498      This code adds extra rows to make sure the number of rows is
3499     divisible by the blocksize
3500   */
3501   mbs        = M/bs;
3502   extra_rows = bs - M + bs*(mbs);
3503   if (extra_rows == bs) extra_rows = 0;
3504   else mbs++;
3505   if (extra_rows) {
3506     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3507   }
3508 
3509   /* Set global sizes if not already set */
3510   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3511     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3512   } else { /* Check if the matrix global sizes are correct */
3513     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3514     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3515       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3516     }
3517     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3518   }
3519 
3520   /* read in row lengths */
3521   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3522   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3523   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3524 
3525   /* read in column indices */
3526   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3527   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3528   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3529 
3530   /* loop over row lengths determining block row lengths */
3531   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3532   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3533   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3534   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3535   rowcount = 0;
3536   nzcount  = 0;
3537   for (i=0; i<mbs; i++) {
3538     nmask = 0;
3539     for (j=0; j<bs; j++) {
3540       kmax = rowlengths[rowcount];
3541       for (k=0; k<kmax; k++) {
3542         tmp = jj[nzcount++]/bs;
3543         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3544       }
3545       rowcount++;
3546     }
3547     browlengths[i] += nmask;
3548     /* zero out the mask elements we set */
3549     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3550   }
3551 
3552   /* Do preallocation  */
3553   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3554   a    = (Mat_SeqBAIJ*)newmat->data;
3555 
3556   /* set matrix "i" values */
3557   a->i[0] = 0;
3558   for (i=1; i<= mbs; i++) {
3559     a->i[i]      = a->i[i-1] + browlengths[i-1];
3560     a->ilen[i-1] = browlengths[i-1];
3561   }
3562   a->nz = 0;
3563   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3564 
3565   /* read in nonzero values */
3566   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3567   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3568   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3569 
3570   /* set "a" and "j" values into matrix */
3571   nzcount = 0; jcount = 0;
3572   for (i=0; i<mbs; i++) {
3573     nzcountb = nzcount;
3574     nmask    = 0;
3575     for (j=0; j<bs; j++) {
3576       kmax = rowlengths[i*bs+j];
3577       for (k=0; k<kmax; k++) {
3578         tmp = jj[nzcount++]/bs;
3579         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3580       }
3581     }
3582     /* sort the masked values */
3583     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3584 
3585     /* set "j" values into matrix */
3586     maskcount = 1;
3587     for (j=0; j<nmask; j++) {
3588       a->j[jcount++]  = masked[j];
3589       mask[masked[j]] = maskcount++;
3590     }
3591     /* set "a" values into matrix */
3592     ishift = bs2*a->i[i];
3593     for (j=0; j<bs; j++) {
3594       kmax = rowlengths[i*bs+j];
3595       for (k=0; k<kmax; k++) {
3596         tmp       = jj[nzcountb]/bs;
3597         block     = mask[tmp] - 1;
3598         point     = jj[nzcountb] - bs*tmp;
3599         idx       = ishift + bs2*block + j + bs*point;
3600         a->a[idx] = (MatScalar)aa[nzcountb++];
3601       }
3602     }
3603     /* zero out the mask elements we set */
3604     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3605   }
3606   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3607 
3608   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3609   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3610   ierr = PetscFree(aa);CHKERRQ(ierr);
3611   ierr = PetscFree(jj);CHKERRQ(ierr);
3612   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3613 
3614   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3615   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3616   PetscFunctionReturn(0);
3617 }
3618 
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatCreateSeqBAIJ"
3621 /*@C
3622    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3623    compressed row) format.  For good matrix assembly performance the
3624    user should preallocate the matrix storage by setting the parameter nz
3625    (or the array nnz).  By setting these parameters accurately, performance
3626    during matrix assembly can be increased by more than a factor of 50.
3627 
3628    Collective on MPI_Comm
3629 
3630    Input Parameters:
3631 +  comm - MPI communicator, set to PETSC_COMM_SELF
3632 .  bs - size of block
3633 .  m - number of rows
3634 .  n - number of columns
3635 .  nz - number of nonzero blocks  per block row (same for all rows)
3636 -  nnz - array containing the number of nonzero blocks in the various block rows
3637          (possibly different for each block row) or NULL
3638 
3639    Output Parameter:
3640 .  A - the matrix
3641 
3642    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3643    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3644    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3645 
3646    Options Database Keys:
3647 .   -mat_no_unroll - uses code that does not unroll the loops in the
3648                      block calculations (much slower)
3649 .    -mat_block_size - size of the blocks to use
3650 
3651    Level: intermediate
3652 
3653    Notes:
3654    The number of rows and columns must be divisible by blocksize.
3655 
3656    If the nnz parameter is given then the nz parameter is ignored
3657 
3658    A nonzero block is any block that as 1 or more nonzeros in it
3659 
3660    The block AIJ format is fully compatible with standard Fortran 77
3661    storage.  That is, the stored row and column indices can begin at
3662    either one (as in Fortran) or zero.  See the users' manual for details.
3663 
3664    Specify the preallocated storage with either nz or nnz (not both).
3665    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3666    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3667    matrices.
3668 
3669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3670 @*/
3671 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3672 {
3673   PetscErrorCode ierr;
3674 
3675   PetscFunctionBegin;
3676   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3677   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3678   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3679   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 #undef __FUNCT__
3684 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3685 /*@C
3686    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3687    per row in the matrix. For good matrix assembly performance the
3688    user should preallocate the matrix storage by setting the parameter nz
3689    (or the array nnz).  By setting these parameters accurately, performance
3690    during matrix assembly can be increased by more than a factor of 50.
3691 
3692    Collective on MPI_Comm
3693 
3694    Input Parameters:
3695 +  A - the matrix
3696 .  bs - size of block
3697 .  nz - number of block nonzeros per block row (same for all rows)
3698 -  nnz - array containing the number of block nonzeros in the various block rows
3699          (possibly different for each block row) or NULL
3700 
3701    Options Database Keys:
3702 .   -mat_no_unroll - uses code that does not unroll the loops in the
3703                      block calculations (much slower)
3704 .    -mat_block_size - size of the blocks to use
3705 
3706    Level: intermediate
3707 
3708    Notes:
3709    If the nnz parameter is given then the nz parameter is ignored
3710 
3711    You can call MatGetInfo() to get information on how effective the preallocation was;
3712    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3713    You can also run with the option -info and look for messages with the string
3714    malloc in them to see if additional memory allocation was needed.
3715 
3716    The block AIJ format is fully compatible with standard Fortran 77
3717    storage.  That is, the stored row and column indices can begin at
3718    either one (as in Fortran) or zero.  See the users' manual for details.
3719 
3720    Specify the preallocated storage with either nz or nnz (not both).
3721    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3722    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3723 
3724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3725 @*/
3726 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3727 {
3728   PetscErrorCode ierr;
3729 
3730   PetscFunctionBegin;
3731   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3732   PetscValidType(B,1);
3733   PetscValidLogicalCollectiveInt(B,bs,2);
3734   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3735   PetscFunctionReturn(0);
3736 }
3737 
3738 #undef __FUNCT__
3739 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3740 /*@C
3741    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3742    (the default sequential PETSc format).
3743 
3744    Collective on MPI_Comm
3745 
3746    Input Parameters:
3747 +  A - the matrix
3748 .  i - the indices into j for the start of each local row (starts with zero)
3749 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3750 -  v - optional values in the matrix
3751 
3752    Level: developer
3753 
3754 .keywords: matrix, aij, compressed row, sparse
3755 
3756 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3757 @*/
3758 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3759 {
3760   PetscErrorCode ierr;
3761 
3762   PetscFunctionBegin;
3763   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3764   PetscValidType(B,1);
3765   PetscValidLogicalCollectiveInt(B,bs,2);
3766   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3773 /*@
3774      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3775 
3776      Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +  comm - must be an MPI communicator of size 1
3780 .  bs - size of block
3781 .  m - number of rows
3782 .  n - number of columns
3783 .  i - row indices
3784 .  j - column indices
3785 -  a - matrix values
3786 
3787    Output Parameter:
3788 .  mat - the matrix
3789 
3790    Level: advanced
3791 
3792    Notes:
3793        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3794     once the matrix is destroyed
3795 
3796        You cannot set new nonzero locations into this matrix, that will generate an error.
3797 
3798        The i and j indices are 0 based
3799 
3800        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3801 
3802 
3803 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3804 
3805 @*/
3806 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3807 {
3808   PetscErrorCode ierr;
3809   PetscInt       ii;
3810   Mat_SeqBAIJ    *baij;
3811 
3812   PetscFunctionBegin;
3813   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3814   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3815 
3816   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3817   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3818   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3819   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3820   baij = (Mat_SeqBAIJ*)(*mat)->data;
3821   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3822   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3823 
3824   baij->i = i;
3825   baij->j = j;
3826   baij->a = a;
3827 
3828   baij->singlemalloc = PETSC_FALSE;
3829   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3830   baij->free_a       = PETSC_FALSE;
3831   baij->free_ij      = PETSC_FALSE;
3832 
3833   for (ii=0; ii<m; ii++) {
3834     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3835 #if defined(PETSC_USE_DEBUG)
3836     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3837 #endif
3838   }
3839 #if defined(PETSC_USE_DEBUG)
3840   for (ii=0; ii<baij->i[m]; ii++) {
3841     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3842     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3843   }
3844 #endif
3845 
3846   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850