xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c5df96a59d3f7e354f9770fb35f00818af2dc9fc)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52         ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53         diag    += 4;
54         mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66         ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67         diag    += 9;
68         mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76         ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77         diag  += 16;
78         mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86         ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87         diag  += 25;
88         mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97         diag  += 36;
98         mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107         diag  += 49;
108         mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118         diag  += bs2;
119         mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
211   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
212   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
218 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
219 {
220   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
221   PetscScalar        *x,x1,x2,s1,s2;
222   const PetscScalar  *b;
223   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
224   PetscErrorCode     ierr;
225   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
226   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
227 
228   PetscFunctionBegin;
229   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
235   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
246       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
248       i2     = 2;
249       idiag += 4;
250       for (i=1; i<m; i++) {
251         v     = aa + 4*ai[i];
252         vi    = aj + ai[i];
253         nz    = diag[i] - ai[i];
254         s1    = b[i2]; s2 = b[i2+1];
255         for (j=0; j<nz; j++) {
256           idx  = 2*vi[j];
257           it   = 4*j;
258           x1   = x[idx]; x2 = x[1+idx];
259           s1  -= v[it]*x1 + v[it+2]*x2;
260           s2  -= v[it+1]*x1 + v[it+3]*x2;
261         }
262         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
263         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
264         idiag   += 4;
265         i2      += 2;
266       }
267       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
268       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
269     }
270     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
271         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
272       i2    = 0;
273       mdiag = a->idiag+4*a->mbs;
274       for (i=0; i<m; i++) {
275         x1      = x[i2]; x2 = x[i2+1];
276         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
277         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
278         mdiag  += 4;
279         i2     += 2;
280       }
281       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
282     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
283       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
284     }
285     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
286       idiag   = a->idiag+4*a->mbs - 4;
287       i2      = 2*m - 2;
288       x1      = x[i2]; x2 = x[i2+1];
289       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
290       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
291       idiag -= 4;
292       i2    -= 2;
293       for (i=m-2; i>=0; i--) {
294         v     = aa + 4*(diag[i]+1);
295         vi    = aj + diag[i] + 1;
296         nz    = ai[i+1] - diag[i] - 1;
297         s1    = x[i2]; s2 = x[i2+1];
298         for (j=0; j<nz; j++) {
299           idx  = 2*vi[j];
300           it   = 4*j;
301           x1   = x[idx]; x2 = x[1+idx];
302           s1  -= v[it]*x1 + v[it+2]*x2;
303           s2  -= v[it+1]*x1 + v[it+3]*x2;
304         }
305         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
306         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
307         idiag   -= 4;
308         i2      -= 2;
309       }
310       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
311     }
312   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
320 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
420     }
421   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
423   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
429 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
430 {
431   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
432   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
433   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
434   const PetscScalar  *b;
435   PetscErrorCode     ierr;
436   PetscInt           m = a->mbs,i,i2,nz,idx;
437   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
438 
439   PetscFunctionBegin;
440   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
441   its = its*lits;
442   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
443   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
444   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
445   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
446   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
447 
448   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
449 
450   diag  = a->diag;
451   idiag = a->idiag;
452   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
454 
455   if (flag & SOR_ZERO_INITIAL_GUESS) {
456     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
457       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
458       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
459       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
460       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
461       i2     = 4;
462       idiag += 16;
463       for (i=1; i<m; i++) {
464         v     = aa + 16*ai[i];
465         vi    = aj + ai[i];
466         nz    = diag[i] - ai[i];
467         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
468         while (nz--) {
469           idx  = 4*(*vi++);
470           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
471           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
472           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
473           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
474           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
475           v   += 16;
476         }
477         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
478         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
479         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
480         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
481         idiag   += 16;
482         i2      += 4;
483       }
484       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
485       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
486     }
487     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
488         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
489       i2    = 0;
490       mdiag = a->idiag+16*a->mbs;
491       for (i=0; i<m; i++) {
492         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
493         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
494         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
495         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
496         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
497         mdiag  += 16;
498         i2     += 4;
499       }
500       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
501     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
502       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
503     }
504     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
505       idiag   = a->idiag+16*a->mbs - 16;
506       i2      = 4*m - 4;
507       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
509       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
510       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
511       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
512       idiag -= 16;
513       i2    -= 4;
514       for (i=m-2; i>=0; i--) {
515         v     = aa + 16*(diag[i]+1);
516         vi    = aj + diag[i] + 1;
517         nz    = ai[i+1] - diag[i] - 1;
518         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
519         while (nz--) {
520           idx  = 4*(*vi++);
521           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
522           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
523           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
524           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
525           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
526           v   += 16;
527         }
528         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
529         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
530         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
531         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
532         idiag   -= 16;
533         i2      -= 4;
534       }
535       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
536     }
537   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
538   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
546 {
547   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
548   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
549   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
550   const PetscScalar  *b;
551   PetscErrorCode     ierr;
552   PetscInt           m = a->mbs,i,i2,nz,idx;
553   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
554 
555   PetscFunctionBegin;
556   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
557   its = its*lits;
558   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
559   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
560   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
561   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
562   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
563 
564   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
565 
566   diag  = a->diag;
567   idiag = a->idiag;
568   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
570 
571   if (flag & SOR_ZERO_INITIAL_GUESS) {
572     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
573       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
574       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
575       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
576       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
577       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
578       i2     = 5;
579       idiag += 25;
580       for (i=1; i<m; i++) {
581         v     = aa + 25*ai[i];
582         vi    = aj + ai[i];
583         nz    = diag[i] - ai[i];
584         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
585         while (nz--) {
586           idx  = 5*(*vi++);
587           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
588           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
589           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
590           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
591           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
592           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
593           v   += 25;
594         }
595         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
596         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
597         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
598         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
599         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
600         idiag   += 25;
601         i2      += 5;
602       }
603       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
604       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
605     }
606     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
607         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
608       i2    = 0;
609       mdiag = a->idiag+25*a->mbs;
610       for (i=0; i<m; i++) {
611         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
612         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
613         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
614         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
615         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
616         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
617         mdiag  += 25;
618         i2     += 5;
619       }
620       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
621     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
622       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
623     }
624     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
625       idiag   = a->idiag+25*a->mbs - 25;
626       i2      = 5*m - 5;
627       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
629       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
630       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
631       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
632       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
633       idiag -= 25;
634       i2    -= 5;
635       for (i=m-2; i>=0; i--) {
636         v     = aa + 25*(diag[i]+1);
637         vi    = aj + diag[i] + 1;
638         nz    = ai[i+1] - diag[i] - 1;
639         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
640         while (nz--) {
641           idx  = 5*(*vi++);
642           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
643           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
644           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
645           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
646           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
647           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
648           v   += 25;
649         }
650         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
651         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
652         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
653         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
654         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
655         idiag   -= 25;
656         i2      -= 5;
657       }
658       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
659     }
660   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
661   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
668 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669 {
670   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
671   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
673   const PetscScalar  *b;
674   PetscErrorCode     ierr;
675   PetscInt           m = a->mbs,i,i2,nz,idx;
676   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
677 
678   PetscFunctionBegin;
679   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
680   its = its*lits;
681   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
682   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
683   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
684   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
685   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
686 
687   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
688 
689   diag  = a->diag;
690   idiag = a->idiag;
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
693 
694   if (flag & SOR_ZERO_INITIAL_GUESS) {
695     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
696       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
697       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
698       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
699       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
700       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
701       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
702       i2     = 6;
703       idiag += 36;
704       for (i=1; i<m; i++) {
705         v     = aa + 36*ai[i];
706         vi    = aj + ai[i];
707         nz    = diag[i] - ai[i];
708         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
709         while (nz--) {
710           idx  = 6*(*vi++);
711           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
712           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
713           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
714           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
715           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
716           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
717           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
718           v   += 36;
719         }
720         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
721         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
722         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
723         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
724         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
725         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
726         idiag   += 36;
727         i2      += 6;
728       }
729       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
730       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
731     }
732     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
733         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
734       i2    = 0;
735       mdiag = a->idiag+36*a->mbs;
736       for (i=0; i<m; i++) {
737         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
738         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
739         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
740         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
741         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
742         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
743         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
744         mdiag  += 36;
745         i2     += 6;
746       }
747       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
748     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
749       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
750     }
751     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
752       idiag   = a->idiag+36*a->mbs - 36;
753       i2      = 6*m - 6;
754       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
755       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
756       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
757       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
758       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
759       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
760       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
761       idiag -= 36;
762       i2    -= 6;
763       for (i=m-2; i>=0; i--) {
764         v     = aa + 36*(diag[i]+1);
765         vi    = aj + diag[i] + 1;
766         nz    = ai[i+1] - diag[i] - 1;
767         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
768         while (nz--) {
769           idx  = 6*(*vi++);
770           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
771           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
772           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
773           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
774           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
775           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
776           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
777           v   += 36;
778         }
779         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
780         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
781         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
782         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
783         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
784         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
785         idiag   -= 36;
786         i2      -= 6;
787       }
788       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
789     }
790   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
792   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
798 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
799 {
800   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
801   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
802   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
803   const PetscScalar  *b;
804   PetscErrorCode     ierr;
805   PetscInt           m = a->mbs,i,i2,nz,idx;
806   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
807 
808   PetscFunctionBegin;
809   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
810   its = its*lits;
811   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
816 
817   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
818 
819   diag  = a->diag;
820   idiag = a->idiag;
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
823 
824   if (flag & SOR_ZERO_INITIAL_GUESS) {
825     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
826       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833       i2     = 7;
834       idiag += 49;
835       for (i=1; i<m; i++) {
836         v     = aa + 49*ai[i];
837         vi    = aj + ai[i];
838         nz    = diag[i] - ai[i];
839         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840         while (nz--) {
841           idx  = 7*(*vi++);
842           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850           v   += 49;
851         }
852         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859         idiag   += 49;
860         i2      += 7;
861       }
862       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
864     }
865     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867       i2    = 0;
868       mdiag = a->idiag+49*a->mbs;
869       for (i=0; i<m; i++) {
870         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
872         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
873         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
874         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
875         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
876         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
877         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
878         mdiag  += 49;
879         i2     += 7;
880       }
881       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
882     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
884     }
885     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
886       idiag   = a->idiag+49*a->mbs - 49;
887       i2      = 7*m - 7;
888       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896       idiag -= 49;
897       i2    -= 7;
898       for (i=m-2; i>=0; i--) {
899         v     = aa + 49*(diag[i]+1);
900         vi    = aj + diag[i] + 1;
901         nz    = ai[i+1] - diag[i] - 1;
902         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903         while (nz--) {
904           idx  = 7*(*vi++);
905           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913           v   += 49;
914         }
915         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922         idiag   -= 49;
923         i2      -= 7;
924       }
925       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
926     }
927   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
935 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
936 {
937   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
938   PetscScalar        *x,*work,*w,*workt;
939   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
940   const PetscScalar  *b;
941   PetscErrorCode     ierr;
942   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
943   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
944 
945   PetscFunctionBegin;
946   its = its*lits;
947   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
948   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
949   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
950   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
951   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
952   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
953 
954   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
955 
956   diag  = a->diag;
957   idiag = a->idiag;
958   if (!a->mult_work) {
959     k    = PetscMax(A->rmap->n,A->cmap->n);
960     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
961   }
962   work = a->mult_work;
963   if (!a->sor_work) {
964     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
965   }
966   w = a->sor_work;
967 
968   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
969   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
970 
971   if (flag & SOR_ZERO_INITIAL_GUESS) {
972     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
973       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
974       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
975       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
976       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
977       i2     = bs;
978       idiag += bs2;
979       for (i=1; i<m; i++) {
980         v     = aa + bs2*ai[i];
981         vi    = aj + ai[i];
982         nz    = diag[i] - ai[i];
983 
984         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
985         /* copy all rows of x that are needed into contiguous space */
986         workt = work;
987         for (j=0; j<nz; j++) {
988           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
989           workt += bs;
990         }
991         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
992        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
993         while (nz--) {
994           idx  = N*(*vi++);
995           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
996           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
997           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
998           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
999           v   += N2;
1000           } */
1001 
1002         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1003         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1004         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1005         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1006 
1007         idiag   += bs2;
1008         i2      += bs;
1009       }
1010       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1011       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1012     }
1013     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1014         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1015       i2    = 0;
1016       mdiag = a->idiag+bs2*a->mbs;
1017       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1018       for (i=0; i<m; i++) {
1019         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1020         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1021         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1022         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1023         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1024 
1025         mdiag  += bs2;
1026         i2     += bs;
1027       }
1028       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1029     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1030       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1031     }
1032     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1033       idiag   = a->idiag+bs2*a->mbs - bs2;
1034       i2      = bs*m - bs;
1035       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1036       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1037       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1038       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1039       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1040       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1041       idiag -= bs2;
1042       i2    -= bs;
1043       for (i=m-2; i>=0; i--) {
1044         v     = aa + bs2*(diag[i]+1);
1045         vi    = aj + diag[i] + 1;
1046         nz    = ai[i+1] - diag[i] - 1;
1047 
1048         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049         /* copy all rows of x that are needed into contiguous space */
1050         workt = work;
1051         for (j=0; j<nz; j++) {
1052           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053           workt += bs;
1054         }
1055         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1056         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1057         while (nz--) {
1058           idx  = N*(*vi++);
1059           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1060           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1061           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1062           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1063           v   += N2;
1064           } */
1065 
1066         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1067         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1068         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1069         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1070         idiag   -= bs2;
1071         i2      -= bs;
1072       }
1073       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1074     }
1075   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1076   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1077   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /*
1082     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1083 */
1084 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1085 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1087 #define matsetvaluesblocked4_ matsetvaluesblocked4
1088 #endif
1089 
1090 EXTERN_C_BEGIN
1091 #undef __FUNCT__
1092 #define __FUNCT__ "matsetvaluesblocked4_"
1093 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1094 {
1095   Mat               A = *AA;
1096   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1097   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1098   PetscInt          *ai=a->i,*ailen=a->ilen;
1099   PetscInt          *aj=a->j,stepval,lastcol = -1;
1100   const PetscScalar *value = v;
1101   MatScalar         *ap,*aa = a->a,*bap;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1105   stepval = (n-1)*4;
1106   for (k=0; k<m; k++) { /* loop over added rows */
1107     row  = im[k];
1108     rp   = aj + ai[row];
1109     ap   = aa + 16*ai[row];
1110     nrow = ailen[row];
1111     low  = 0;
1112     high = nrow;
1113     for (l=0; l<n; l++) { /* loop over added columns */
1114       col = in[l];
1115       if (col <= lastcol)  low = 0;
1116       else                high = nrow;
1117       lastcol = col;
1118       value = v + k*(stepval+4 + l)*4;
1119       while (high-low > 7) {
1120         t = (low+high)/2;
1121         if (rp[t] > col) high = t;
1122         else             low  = t;
1123       }
1124       for (i=low; i<high; i++) {
1125         if (rp[i] > col) break;
1126         if (rp[i] == col) {
1127           bap  = ap +  16*i;
1128           for (ii=0; ii<4; ii++,value+=stepval) {
1129             for (jj=ii; jj<16; jj+=4) {
1130               bap[jj] += *value++;
1131             }
1132           }
1133           goto noinsert2;
1134         }
1135       }
1136       N = nrow++ - 1;
1137       high++; /* added new column index thus must search to one higher than before */
1138       /* shift up all the later entries in this row */
1139       for (ii=N; ii>=i; ii--) {
1140         rp[ii+1] = rp[ii];
1141         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1142       }
1143       if (N >= i) {
1144         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1145       }
1146       rp[i] = col;
1147       bap   = ap +  16*i;
1148       for (ii=0; ii<4; ii++,value+=stepval) {
1149         for (jj=ii; jj<16; jj+=4) {
1150           bap[jj] = *value++;
1151         }
1152       }
1153       noinsert2:;
1154       low = i;
1155     }
1156     ailen[row] = nrow;
1157   }
1158   PetscFunctionReturnVoid();
1159 }
1160 EXTERN_C_END
1161 
1162 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1163 #define matsetvalues4_ MATSETVALUES4
1164 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1165 #define matsetvalues4_ matsetvalues4
1166 #endif
1167 
1168 EXTERN_C_BEGIN
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSetValues4_"
1171 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1172 {
1173   Mat         A = *AA;
1174   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1175   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1176   PetscInt    *ai=a->i,*ailen=a->ilen;
1177   PetscInt    *aj=a->j,brow,bcol;
1178   PetscInt    ridx,cidx,lastcol = -1;
1179   MatScalar   *ap,value,*aa=a->a,*bap;
1180 
1181   PetscFunctionBegin;
1182   for (k=0; k<m; k++) { /* loop over added rows */
1183     row  = im[k]; brow = row/4;
1184     rp   = aj + ai[brow];
1185     ap   = aa + 16*ai[brow];
1186     nrow = ailen[brow];
1187     low  = 0;
1188     high = nrow;
1189     for (l=0; l<n; l++) { /* loop over added columns */
1190       col = in[l]; bcol = col/4;
1191       ridx = row % 4; cidx = col % 4;
1192       value = v[l + k*n];
1193       if (col <= lastcol)  low = 0;
1194       else                high = nrow;
1195       lastcol = col;
1196       while (high-low > 7) {
1197         t = (low+high)/2;
1198         if (rp[t] > bcol) high = t;
1199         else              low  = t;
1200       }
1201       for (i=low; i<high; i++) {
1202         if (rp[i] > bcol) break;
1203         if (rp[i] == bcol) {
1204           bap  = ap +  16*i + 4*cidx + ridx;
1205           *bap += value;
1206           goto noinsert1;
1207         }
1208       }
1209       N = nrow++ - 1;
1210       high++; /* added new column thus must search to one higher than before */
1211       /* shift up all the later entries in this row */
1212       for (ii=N; ii>=i; ii--) {
1213         rp[ii+1] = rp[ii];
1214         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1215       }
1216       if (N>=i) {
1217         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1218       }
1219       rp[i]                    = bcol;
1220       ap[16*i + 4*cidx + ridx] = value;
1221       noinsert1:;
1222       low = i;
1223     }
1224     ailen[brow] = nrow;
1225   }
1226   PetscFunctionReturnVoid();
1227 }
1228 EXTERN_C_END
1229 
1230 /*
1231      Checks for missing diagonals
1232 */
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1236 {
1237   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239   PetscInt       *diag,*jj = a->j,i;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1243   *missing = PETSC_FALSE;
1244   if (A->rmap->n > 0 && !jj) {
1245     *missing  = PETSC_TRUE;
1246     if (d) *d = 0;
1247     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1248   } else {
1249     diag     = a->diag;
1250     for (i=0; i<a->mbs; i++) {
1251       if (jj[diag[i]] != i) {
1252         *missing  = PETSC_TRUE;
1253         if (d) *d = i;
1254         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1255         break;
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1265 {
1266   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268   PetscInt       i,j,m = a->mbs;
1269 
1270   PetscFunctionBegin;
1271   if (!a->diag) {
1272     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1273     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1274     a->free_diag = PETSC_TRUE;
1275   }
1276   for (i=0; i<m; i++) {
1277     a->diag[i] = a->i[i+1];
1278     for (j=a->i[i]; j<a->i[i+1]; j++) {
1279       if (a->j[j] == i) {
1280         a->diag[i] = j;
1281         break;
1282       }
1283     }
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1292 {
1293   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1296   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1297 
1298   PetscFunctionBegin;
1299   *nn = n;
1300   if (!ia) PetscFunctionReturn(0);
1301   if (symmetric) {
1302     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1303     nz   = tia[n];
1304   } else {
1305     tia = a->i; tja = a->j;
1306   }
1307 
1308   if (!blockcompressed && bs > 1) {
1309     (*nn) *= bs;
1310     /* malloc & create the natural set of indices */
1311     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1312     if (n) {
1313       (*ia)[0] = 0;
1314       for (j=1; j<bs; j++) {
1315         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1316       }
1317     }
1318 
1319     for (i=1; i<n; i++) {
1320       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1321       for (j=1; j<bs; j++) {
1322         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1323       }
1324     }
1325     if (n) {
1326       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1327     }
1328 
1329     if (inja) {
1330       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1331       cnt = 0;
1332       for (i=0; i<n; i++) {
1333         for (j=0; j<bs; j++) {
1334           for (k=tia[i]; k<tia[i+1]; k++) {
1335             for (l=0; l<bs; l++) {
1336               (*ja)[cnt++] = bs*tja[k] + l;
1337             }
1338           }
1339         }
1340       }
1341     }
1342 
1343     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1344       ierr = PetscFree(tia);CHKERRQ(ierr);
1345       ierr = PetscFree(tja);CHKERRQ(ierr);
1346     }
1347   } else if (oshift == 1) {
1348     if (symmetric) {
1349       nz = tia[A->rmap->n/bs];
1350       /*  add 1 to i and j indices */
1351       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1352       *ia = tia;
1353       if (ja) {
1354         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1355         *ja = tja;
1356       }
1357     } else {
1358       nz = a->i[A->rmap->n/bs];
1359       /* malloc space and  add 1 to i and j indices */
1360       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1361       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1362       if (ja) {
1363         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1364         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1365       }
1366     }
1367   } else {
1368     *ia = tia;
1369     if (ja) *ja = tja;
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1377 {
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   if (!ia) PetscFunctionReturn(0);
1382   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1383     ierr = PetscFree(*ia);CHKERRQ(ierr);
1384     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1392 {
1393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397 #if defined(PETSC_USE_LOG)
1398   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1399 #endif
1400   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1401   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1402   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1403   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1404   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1405   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1406   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1407   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1408   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1409   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1410   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1411   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1412   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1413 
1414   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1415   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1416   ierr = PetscFree(A->data);CHKERRQ(ierr);
1417 
1418   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1425   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1435 {
1436   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1437   PetscErrorCode ierr;
1438 
1439   PetscFunctionBegin;
1440   switch (op) {
1441   case MAT_ROW_ORIENTED:
1442     a->roworiented    = flg;
1443     break;
1444   case MAT_KEEP_NONZERO_PATTERN:
1445     a->keepnonzeropattern = flg;
1446     break;
1447   case MAT_NEW_NONZERO_LOCATIONS:
1448     a->nonew          = (flg ? 0 : 1);
1449     break;
1450   case MAT_NEW_NONZERO_LOCATION_ERR:
1451     a->nonew          = (flg ? -1 : 0);
1452     break;
1453   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1454     a->nonew          = (flg ? -2 : 0);
1455     break;
1456   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1457     a->nounused       = (flg ? -1 : 0);
1458     break;
1459   case MAT_CHECK_COMPRESSED_ROW:
1460     a->compressedrow.check = flg;
1461     break;
1462   case MAT_NEW_DIAGONALS:
1463   case MAT_IGNORE_OFF_PROC_ENTRIES:
1464   case MAT_USE_HASH_TABLE:
1465     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1466     break;
1467   case MAT_SPD:
1468   case MAT_SYMMETRIC:
1469   case MAT_STRUCTURALLY_SYMMETRIC:
1470   case MAT_HERMITIAN:
1471   case MAT_SYMMETRY_ETERNAL:
1472     /* These options are handled directly by MatSetOption() */
1473     break;
1474   default:
1475     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1487   MatScalar      *aa,*aa_i;
1488   PetscScalar    *v_i;
1489 
1490   PetscFunctionBegin;
1491   bs  = A->rmap->bs;
1492   ai  = a->i;
1493   aj  = a->j;
1494   aa  = a->a;
1495   bs2 = a->bs2;
1496 
1497   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1498 
1499   bn  = row/bs;   /* Block number */
1500   bp  = row % bs; /* Block Position */
1501   M   = ai[bn+1] - ai[bn];
1502   *nz = bs*M;
1503 
1504   if (v) {
1505     *v = 0;
1506     if (*nz) {
1507       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1508       for (i=0; i<M; i++) { /* for each block in the block row */
1509         v_i  = *v + i*bs;
1510         aa_i = aa + bs2*(ai[bn] + i);
1511         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1512       }
1513     }
1514   }
1515 
1516   if (idx) {
1517     *idx = 0;
1518     if (*nz) {
1519       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1520       for (i=0; i<M; i++) { /* for each block in the block row */
1521         idx_i = *idx + i*bs;
1522         itmp  = bs*aj[ai[bn] + i];
1523         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1524       }
1525     }
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1538   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1547 {
1548   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1549   Mat            C;
1550   PetscErrorCode ierr;
1551   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1552   PetscInt       *rows,*cols,bs2=a->bs2;
1553   MatScalar      *array;
1554 
1555   PetscFunctionBegin;
1556   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1557   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1558     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1559     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1560 
1561     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1562     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1563     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1564     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1565     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1566     ierr = PetscFree(col);CHKERRQ(ierr);
1567   } else {
1568     C = *B;
1569   }
1570 
1571   array = a->a;
1572   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1573   for (i=0; i<mbs; i++) {
1574     cols[0] = i*bs;
1575     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1576     len = ai[i+1] - ai[i];
1577     for (j=0; j<len; j++) {
1578       rows[0] = (*aj++)*bs;
1579       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1580       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1581       array += bs2;
1582     }
1583   }
1584   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1585 
1586   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1587   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1588 
1589   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1590     *B = C;
1591   } else {
1592     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1600 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1601 {
1602   PetscErrorCode ierr;
1603   Mat            Btrans;
1604 
1605   PetscFunctionBegin;
1606   *f = PETSC_FALSE;
1607   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1608   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1609   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1610   PetscFunctionReturn(0);
1611 }
1612 EXTERN_C_END
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1616 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1617 {
1618   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1619   PetscErrorCode ierr;
1620   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1621   int            fd;
1622   PetscScalar    *aa;
1623   FILE           *file;
1624 
1625   PetscFunctionBegin;
1626   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1627   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1628   col_lens[0] = MAT_FILE_CLASSID;
1629 
1630   col_lens[1] = A->rmap->N;
1631   col_lens[2] = A->cmap->n;
1632   col_lens[3] = a->nz*bs2;
1633 
1634   /* store lengths of each row and write (including header) to file */
1635   count = 0;
1636   for (i=0; i<a->mbs; i++) {
1637     for (j=0; j<bs; j++) {
1638       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1639     }
1640   }
1641   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1642   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1643 
1644   /* store column indices (zero start index) */
1645   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1646   count = 0;
1647   for (i=0; i<a->mbs; i++) {
1648     for (j=0; j<bs; j++) {
1649       for (k=a->i[i]; k<a->i[i+1]; k++) {
1650         for (l=0; l<bs; l++) {
1651           jj[count++] = bs*a->j[k] + l;
1652         }
1653       }
1654     }
1655   }
1656   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1657   ierr = PetscFree(jj);CHKERRQ(ierr);
1658 
1659   /* store nonzero values */
1660   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1661   count = 0;
1662   for (i=0; i<a->mbs; i++) {
1663     for (j=0; j<bs; j++) {
1664       for (k=a->i[i]; k<a->i[i+1]; k++) {
1665         for (l=0; l<bs; l++) {
1666           aa[count++] = a->a[bs2*k + l*bs + j];
1667         }
1668       }
1669     }
1670   }
1671   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1672   ierr = PetscFree(aa);CHKERRQ(ierr);
1673 
1674   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1675   if (file) {
1676     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1683 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1684 {
1685   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1686   PetscErrorCode    ierr;
1687   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1688   PetscViewerFormat format;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1692   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1693     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1694   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1695     Mat aij;
1696     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1697     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1698     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1699   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1700      PetscFunctionReturn(0);
1701   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1702     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1703     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1704     for (i=0; i<a->mbs; i++) {
1705       for (j=0; j<bs; j++) {
1706         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1707         for (k=a->i[i]; k<a->i[i+1]; k++) {
1708           for (l=0; l<bs; l++) {
1709 #if defined(PETSC_USE_COMPLEX)
1710             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1711               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1712                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1713             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1714               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1715                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1716             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1717               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1718             }
1719 #else
1720             if (a->a[bs2*k + l*bs + j] != 0.0) {
1721               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1722             }
1723 #endif
1724           }
1725         }
1726         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1727       }
1728     }
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1730   } else {
1731     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1732     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1733     for (i=0; i<a->mbs; i++) {
1734       for (j=0; j<bs; j++) {
1735         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1736         for (k=a->i[i]; k<a->i[i+1]; k++) {
1737           for (l=0; l<bs; l++) {
1738 #if defined(PETSC_USE_COMPLEX)
1739             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1741                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1743               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1744                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1745             } else {
1746               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1747             }
1748 #else
1749             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1750 #endif
1751           }
1752         }
1753         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1754       }
1755     }
1756     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1757   }
1758   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1764 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1765 {
1766   Mat            A = (Mat) Aa;
1767   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1768   PetscErrorCode ierr;
1769   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1770   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1771   MatScalar      *aa;
1772   PetscViewer    viewer;
1773   PetscViewerFormat format;
1774 
1775   PetscFunctionBegin;
1776   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1777   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1778 
1779   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1780 
1781   /* loop over matrix elements drawing boxes */
1782 
1783   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1784     color = PETSC_DRAW_BLUE;
1785     for (i=0,row=0; i<mbs; i++,row+=bs) {
1786       for (j=a->i[i]; j<a->i[i+1]; j++) {
1787         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1788         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1789         aa = a->a + j*bs2;
1790         for (k=0; k<bs; k++) {
1791           for (l=0; l<bs; l++) {
1792             if (PetscRealPart(*aa++) >=  0.) continue;
1793             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1794           }
1795         }
1796       }
1797     }
1798     color = PETSC_DRAW_CYAN;
1799     for (i=0,row=0; i<mbs; i++,row+=bs) {
1800       for (j=a->i[i]; j<a->i[i+1]; j++) {
1801         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1802         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1803         aa = a->a + j*bs2;
1804         for (k=0; k<bs; k++) {
1805           for (l=0; l<bs; l++) {
1806             if (PetscRealPart(*aa++) != 0.) continue;
1807             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1808           }
1809         }
1810       }
1811     }
1812     color = PETSC_DRAW_RED;
1813     for (i=0,row=0; i<mbs; i++,row+=bs) {
1814       for (j=a->i[i]; j<a->i[i+1]; j++) {
1815         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1816         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1817         aa = a->a + j*bs2;
1818         for (k=0; k<bs; k++) {
1819           for (l=0; l<bs; l++) {
1820             if (PetscRealPart(*aa++) <= 0.) continue;
1821             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1822           }
1823         }
1824       }
1825     }
1826   } else {
1827     /* use contour shading to indicate magnitude of values */
1828     /* first determine max of all nonzero values */
1829     PetscDraw   popup;
1830     PetscReal scale,maxv = 0.0;
1831 
1832     for (i=0; i<a->nz*a->bs2; i++) {
1833       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1834     }
1835     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1836     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1837     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1838     for (i=0,row=0; i<mbs; i++,row+=bs) {
1839       for (j=a->i[i]; j<a->i[i+1]; j++) {
1840         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1841         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1842         aa = a->a + j*bs2;
1843         for (k=0; k<bs; k++) {
1844           for (l=0; l<bs; l++) {
1845             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1846             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1847           }
1848         }
1849       }
1850     }
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1858 {
1859   PetscErrorCode ierr;
1860   PetscReal      xl,yl,xr,yr,w,h;
1861   PetscDraw      draw;
1862   PetscBool      isnull;
1863 
1864   PetscFunctionBegin;
1865   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1866   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1867 
1868   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1869   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1870   xr += w;    yr += h;  xl = -w;     yl = -h;
1871   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1872   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1873   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatView_SeqBAIJ"
1879 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1880 {
1881   PetscErrorCode ierr;
1882   PetscBool      iascii,isbinary,isdraw;
1883 
1884   PetscFunctionBegin;
1885   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1887   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1888   if (iascii) {
1889     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1890   } else if (isbinary) {
1891     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1892   } else if (isdraw) {
1893     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1894   } else {
1895     Mat B;
1896     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1897     ierr = MatView(B,viewer);CHKERRQ(ierr);
1898     ierr = MatDestroy(&B);CHKERRQ(ierr);
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1906 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1907 {
1908   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1909   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1910   PetscInt    *ai = a->i,*ailen = a->ilen;
1911   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1912   MatScalar   *ap,*aa = a->a;
1913 
1914   PetscFunctionBegin;
1915   for (k=0; k<m; k++) { /* loop over rows */
1916     row  = im[k]; brow = row/bs;
1917     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1918     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1919     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1920     nrow = ailen[brow];
1921     for (l=0; l<n; l++) { /* loop over columns */
1922       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1923       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1924       col  = in[l] ;
1925       bcol = col/bs;
1926       cidx = col%bs;
1927       ridx = row%bs;
1928       high = nrow;
1929       low  = 0; /* assume unsorted */
1930       while (high-low > 5) {
1931         t = (low+high)/2;
1932         if (rp[t] > bcol) high = t;
1933         else             low  = t;
1934       }
1935       for (i=low; i<high; i++) {
1936         if (rp[i] > bcol) break;
1937         if (rp[i] == bcol) {
1938           *v++ = ap[bs2*i+bs*cidx+ridx];
1939           goto finished;
1940         }
1941       }
1942       *v++ = 0.0;
1943       finished:;
1944     }
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1951 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1952 {
1953   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1954   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1955   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1956   PetscErrorCode    ierr;
1957   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1958   PetscBool         roworiented=a->roworiented;
1959   const PetscScalar *value = v;
1960   MatScalar         *ap,*aa = a->a,*bap;
1961 
1962   PetscFunctionBegin;
1963   if (roworiented) {
1964     stepval = (n-1)*bs;
1965   } else {
1966     stepval = (m-1)*bs;
1967   }
1968   for (k=0; k<m; k++) { /* loop over added rows */
1969     row  = im[k];
1970     if (row < 0) continue;
1971 #if defined(PETSC_USE_DEBUG)
1972     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1973 #endif
1974     rp   = aj + ai[row];
1975     ap   = aa + bs2*ai[row];
1976     rmax = imax[row];
1977     nrow = ailen[row];
1978     low  = 0;
1979     high = nrow;
1980     for (l=0; l<n; l++) { /* loop over added columns */
1981       if (in[l] < 0) continue;
1982 #if defined(PETSC_USE_DEBUG)
1983       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1984 #endif
1985       col = in[l];
1986       if (roworiented) {
1987         value = v + (k*(stepval+bs) + l)*bs;
1988       } else {
1989         value = v + (l*(stepval+bs) + k)*bs;
1990       }
1991       if (col <= lastcol) low = 0; else high = nrow;
1992       lastcol = col;
1993       while (high-low > 7) {
1994         t = (low+high)/2;
1995         if (rp[t] > col) high = t;
1996         else             low  = t;
1997       }
1998       for (i=low; i<high; i++) {
1999         if (rp[i] > col) break;
2000         if (rp[i] == col) {
2001           bap  = ap +  bs2*i;
2002           if (roworiented) {
2003             if (is == ADD_VALUES) {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] += *value++;
2007                 }
2008               }
2009             } else {
2010               for (ii=0; ii<bs; ii++,value+=stepval) {
2011                 for (jj=ii; jj<bs2; jj+=bs) {
2012                   bap[jj] = *value++;
2013                 }
2014               }
2015             }
2016           } else {
2017             if (is == ADD_VALUES) {
2018               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2019                 for (jj=0; jj<bs; jj++) {
2020                   bap[jj] += value[jj];
2021                 }
2022                 bap += bs;
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2026                 for (jj=0; jj<bs; jj++) {
2027                   bap[jj]  = value[jj];
2028                 }
2029                 bap += bs;
2030               }
2031             }
2032           }
2033           goto noinsert2;
2034         }
2035       }
2036       if (nonew == 1) goto noinsert2;
2037       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2038       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2039       N = nrow++ - 1; high++;
2040       /* shift up all the later entries in this row */
2041       for (ii=N; ii>=i; ii--) {
2042         rp[ii+1] = rp[ii];
2043         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2044       }
2045       if (N >= i) {
2046         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2047       }
2048       rp[i] = col;
2049       bap   = ap +  bs2*i;
2050       if (roworiented) {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=ii; jj<bs2; jj+=bs) {
2053             bap[jj] = *value++;
2054           }
2055         }
2056       } else {
2057         for (ii=0; ii<bs; ii++,value+=stepval) {
2058           for (jj=0; jj<bs; jj++) {
2059             *bap++  = *value++;
2060           }
2061         }
2062       }
2063       noinsert2:;
2064       low = i;
2065     }
2066     ailen[row] = nrow;
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2074 {
2075   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2076   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2077   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2078   PetscErrorCode ierr;
2079   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2080   MatScalar      *aa = a->a,*ap;
2081   PetscReal      ratio=0.6;
2082 
2083   PetscFunctionBegin;
2084   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2085 
2086   if (m) rmax = ailen[0];
2087   for (i=1; i<mbs; i++) {
2088     /* move each row back by the amount of empty slots (fshift) before it*/
2089     fshift += imax[i-1] - ailen[i-1];
2090     rmax   = PetscMax(rmax,ailen[i]);
2091     if (fshift) {
2092       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2093       N = ailen[i];
2094       for (j=0; j<N; j++) {
2095         ip[j-fshift] = ip[j];
2096         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2097       }
2098     }
2099     ai[i] = ai[i-1] + ailen[i-1];
2100   }
2101   if (mbs) {
2102     fshift += imax[mbs-1] - ailen[mbs-1];
2103     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2104   }
2105   /* reset ilen and imax for each row */
2106   for (i=0; i<mbs; i++) {
2107     ailen[i] = imax[i] = ai[i+1] - ai[i];
2108   }
2109   a->nz = ai[mbs];
2110 
2111   /* diagonals may have moved, so kill the diagonal pointers */
2112   a->idiagvalid = PETSC_FALSE;
2113   if (fshift && a->diag) {
2114     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2115     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2116     a->diag = 0;
2117   }
2118   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2119   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2121   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2122   A->info.mallocs     += a->reallocs;
2123   a->reallocs          = 0;
2124   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2125 
2126   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2127   A->same_nonzero = PETSC_TRUE;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /*
2132    This function returns an array of flags which indicate the locations of contiguous
2133    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2134    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2135    Assume: sizes should be long enough to hold all the values.
2136 */
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2139 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2140 {
2141   PetscInt   i,j,k,row;
2142   PetscBool  flg;
2143 
2144   PetscFunctionBegin;
2145   for (i=0,j=0; i<n; j++) {
2146     row = idx[i];
2147     if (row%bs!=0) { /* Not the begining of a block */
2148       sizes[j] = 1;
2149       i++;
2150     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2151       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2152       i++;
2153     } else { /* Begining of the block, so check if the complete block exists */
2154       flg = PETSC_TRUE;
2155       for (k=1; k<bs; k++) {
2156         if (row+k != idx[i+k]) { /* break in the block */
2157           flg = PETSC_FALSE;
2158           break;
2159         }
2160       }
2161       if (flg) { /* No break in the bs */
2162         sizes[j] = bs;
2163         i+= bs;
2164       } else {
2165         sizes[j] = 1;
2166         i++;
2167       }
2168     }
2169   }
2170   *bs_max = j;
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2176 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2177 {
2178   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2179   PetscErrorCode    ierr;
2180   PetscInt          i,j,k,count,*rows;
2181   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2182   PetscScalar       zero = 0.0;
2183   MatScalar         *aa;
2184   const PetscScalar *xx;
2185   PetscScalar       *bb;
2186 
2187   PetscFunctionBegin;
2188   /* fix right hand side if needed */
2189   if (x && b) {
2190     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2191     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2192     for (i=0; i<is_n; i++) {
2193       bb[is_idx[i]] = diag*xx[is_idx[i]];
2194     }
2195     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2196     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2197   }
2198 
2199   /* Make a copy of the IS and  sort it */
2200   /* allocate memory for rows,sizes */
2201   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2202 
2203   /* copy IS values to rows, and sort them */
2204   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2205   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2206 
2207   if (baij->keepnonzeropattern) {
2208     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2209     bs_max = is_n;
2210     A->same_nonzero = PETSC_TRUE;
2211   } else {
2212     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2213     A->same_nonzero = PETSC_FALSE;
2214   }
2215 
2216   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2217     row   = rows[j];
2218     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2219     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2220     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2221     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2222       if (diag != (PetscScalar)0.0) {
2223         if (baij->ilen[row/bs] > 0) {
2224           baij->ilen[row/bs]       = 1;
2225           baij->j[baij->i[row/bs]] = row/bs;
2226           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2227         }
2228         /* Now insert all the diagonal values for this bs */
2229         for (k=0; k<bs; k++) {
2230           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2231         }
2232       } else { /* (diag == 0.0) */
2233         baij->ilen[row/bs] = 0;
2234       } /* end (diag == 0.0) */
2235     } else { /* (sizes[i] != bs) */
2236 #if defined (PETSC_USE_DEBUG)
2237       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2238 #endif
2239       for (k=0; k<count; k++) {
2240         aa[0] =  zero;
2241         aa    += bs;
2242       }
2243       if (diag != (PetscScalar)0.0) {
2244         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2245       }
2246     }
2247   }
2248 
2249   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2250   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2256 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2257 {
2258   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2259   PetscErrorCode    ierr;
2260   PetscInt          i,j,k,count;
2261   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2262   PetscScalar       zero = 0.0;
2263   MatScalar         *aa;
2264   const PetscScalar *xx;
2265   PetscScalar       *bb;
2266   PetscBool         *zeroed,vecs = PETSC_FALSE;
2267 
2268   PetscFunctionBegin;
2269   /* fix right hand side if needed */
2270   if (x && b) {
2271     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2272     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2273     vecs = PETSC_TRUE;
2274   }
2275   A->same_nonzero = PETSC_TRUE;
2276 
2277   /* zero the columns */
2278   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2279   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2280   for (i=0; i<is_n; i++) {
2281     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2282     zeroed[is_idx[i]] = PETSC_TRUE;
2283   }
2284   for (i=0; i<A->rmap->N; i++) {
2285     if (!zeroed[i]) {
2286       row = i/bs;
2287       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2288         for (k=0; k<bs; k++) {
2289           col = bs*baij->j[j] + k;
2290           if (zeroed[col]) {
2291             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2292             if (vecs) bb[i] -= aa[0]*xx[col];
2293             aa[0] = 0.0;
2294           }
2295         }
2296       }
2297     } else if (vecs) bb[i] = diag*xx[i];
2298   }
2299   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2300   if (vecs) {
2301     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2302     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2303   }
2304 
2305   /* zero the rows */
2306   for (i=0; i<is_n; i++) {
2307     row   = is_idx[i];
2308     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2309     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2310     for (k=0; k<count; k++) {
2311       aa[0] =  zero;
2312       aa    += bs;
2313     }
2314     if (diag != (PetscScalar)0.0) {
2315       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2316     }
2317   }
2318   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2324 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2325 {
2326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2327   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2328   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2329   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2330   PetscErrorCode ierr;
2331   PetscInt       ridx,cidx,bs2=a->bs2;
2332   PetscBool      roworiented=a->roworiented;
2333   MatScalar      *ap,value,*aa=a->a,*bap;
2334 
2335   PetscFunctionBegin;
2336   if (v) PetscValidScalarPointer(v,6);
2337   for (k=0; k<m; k++) { /* loop over added rows */
2338     row  = im[k];
2339     brow = row/bs;
2340     if (row < 0) continue;
2341 #if defined(PETSC_USE_DEBUG)
2342     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2343 #endif
2344     rp   = aj + ai[brow];
2345     ap   = aa + bs2*ai[brow];
2346     rmax = imax[brow];
2347     nrow = ailen[brow];
2348     low  = 0;
2349     high = nrow;
2350     for (l=0; l<n; l++) { /* loop over added columns */
2351       if (in[l] < 0) continue;
2352 #if defined(PETSC_USE_DEBUG)
2353       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2354 #endif
2355       col = in[l]; bcol = col/bs;
2356       ridx = row % bs; cidx = col % bs;
2357       if (roworiented) {
2358         value = v[l + k*n];
2359       } else {
2360         value = v[k + l*m];
2361       }
2362       if (col <= lastcol) low = 0; else high = nrow;
2363       lastcol = col;
2364       while (high-low > 7) {
2365         t = (low+high)/2;
2366         if (rp[t] > bcol) high = t;
2367         else              low  = t;
2368       }
2369       for (i=low; i<high; i++) {
2370         if (rp[i] > bcol) break;
2371         if (rp[i] == bcol) {
2372           bap  = ap +  bs2*i + bs*cidx + ridx;
2373           if (is == ADD_VALUES) *bap += value;
2374           else                  *bap  = value;
2375           goto noinsert1;
2376         }
2377       }
2378       if (nonew == 1) goto noinsert1;
2379       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2380       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2381       N = nrow++ - 1; high++;
2382       /* shift up all the later entries in this row */
2383       for (ii=N; ii>=i; ii--) {
2384         rp[ii+1] = rp[ii];
2385         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2386       }
2387       if (N>=i) {
2388         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2389       }
2390       rp[i]                      = bcol;
2391       ap[bs2*i + bs*cidx + ridx] = value;
2392       a->nz++;
2393       noinsert1:;
2394       low = i;
2395     }
2396     ailen[brow] = nrow;
2397   }
2398   A->same_nonzero = PETSC_FALSE;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2404 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2405 {
2406   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2407   Mat            outA;
2408   PetscErrorCode ierr;
2409   PetscBool      row_identity,col_identity;
2410 
2411   PetscFunctionBegin;
2412   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2413   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2414   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2415   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2416 
2417   outA            = inA;
2418   inA->factortype = MAT_FACTOR_LU;
2419 
2420   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2421 
2422   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2423   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2424   a->row = row;
2425   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2426   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2427   a->col = col;
2428 
2429   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2430   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2431    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2432   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2433 
2434   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2435   if (!a->solve_work) {
2436     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2437     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2438   }
2439   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2440 
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 EXTERN_C_BEGIN
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2447 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2448 {
2449   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2450   PetscInt    i,nz,mbs;
2451 
2452   PetscFunctionBegin;
2453   nz  = baij->maxnz;
2454   mbs = baij->mbs;
2455   for (i=0; i<nz; i++) {
2456     baij->j[i] = indices[i];
2457   }
2458   baij->nz = nz;
2459   for (i=0; i<mbs; i++) {
2460     baij->ilen[i] = baij->imax[i];
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 EXTERN_C_END
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2468 /*@
2469     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2470        in the matrix.
2471 
2472   Input Parameters:
2473 +  mat - the SeqBAIJ matrix
2474 -  indices - the column indices
2475 
2476   Level: advanced
2477 
2478   Notes:
2479     This can be called if you have precomputed the nonzero structure of the
2480   matrix and want to provide it to the matrix object to improve the performance
2481   of the MatSetValues() operation.
2482 
2483     You MUST have set the correct numbers of nonzeros per row in the call to
2484   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2485 
2486     MUST be called before any calls to MatSetValues();
2487 
2488 @*/
2489 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2490 {
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2495   PetscValidPointer(indices,2);
2496   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2502 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2503 {
2504   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2505   PetscErrorCode ierr;
2506   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2507   PetscReal      atmp;
2508   PetscScalar    *x,zero = 0.0;
2509   MatScalar      *aa;
2510   PetscInt       ncols,brow,krow,kcol;
2511 
2512   PetscFunctionBegin;
2513   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2514   bs   = A->rmap->bs;
2515   aa   = a->a;
2516   ai   = a->i;
2517   aj   = a->j;
2518   mbs  = a->mbs;
2519 
2520   ierr = VecSet(v,zero);CHKERRQ(ierr);
2521   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2522   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2523   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2524   for (i=0; i<mbs; i++) {
2525     ncols = ai[1] - ai[0]; ai++;
2526     brow  = bs*i;
2527     for (j=0; j<ncols; j++) {
2528       for (kcol=0; kcol<bs; kcol++) {
2529         for (krow=0; krow<bs; krow++) {
2530           atmp = PetscAbsScalar(*aa);aa++;
2531           row = brow + krow;    /* row index */
2532           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2533           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2534         }
2535       }
2536       aj++;
2537     }
2538   }
2539   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatCopy_SeqBAIJ"
2545 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2546 {
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   /* If the two matrices have the same copy implementation, use fast copy. */
2551   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2552     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2553     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2554     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2555 
2556     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2557     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2558     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2559   } else {
2560     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2567 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2568 {
2569   PetscErrorCode ierr;
2570 
2571   PetscFunctionBegin;
2572   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2578 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2579 {
2580   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2581 
2582   PetscFunctionBegin;
2583   *array = a->a;
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2589 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2590 {
2591   PetscFunctionBegin;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2598 {
2599   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2600   PetscErrorCode ierr;
2601   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2602   PetscBLASInt   one=1;
2603 
2604   PetscFunctionBegin;
2605   if (str == SAME_NONZERO_PATTERN) {
2606     PetscScalar  alpha = a;
2607     PetscBLASInt bnz;
2608     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2609     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2610   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2611     if (y->xtoy && y->XtoY != X) {
2612       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2613       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2614     }
2615     if (!y->xtoy) { /* get xtoy */
2616       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2617       y->XtoY = X;
2618       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2619     }
2620     for (i=0; i<x->nz; i++) {
2621       j = 0;
2622       while (j < bs2) {
2623         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2624         j++;
2625       }
2626     }
2627     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2628   } else {
2629     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2630   }
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2636 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2637 {
2638   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2639   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2640   MatScalar      *aa = a->a;
2641 
2642   PetscFunctionBegin;
2643   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2649 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2650 {
2651   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2652   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2653   MatScalar      *aa = a->a;
2654 
2655   PetscFunctionBegin;
2656   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2664 /*
2665     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2666 */
2667 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2668 {
2669   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2670   PetscErrorCode ierr;
2671   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2672   PetscInt       nz = a->i[m],row,*jj,mr,col;
2673 
2674   PetscFunctionBegin;
2675   *nn = n;
2676   if (!ia) PetscFunctionReturn(0);
2677   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2678   else {
2679     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2680     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2682     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2683     jj = a->j;
2684     for (i=0; i<nz; i++) {
2685       collengths[jj[i]]++;
2686     }
2687     cia[0] = oshift;
2688     for (i=0; i<n; i++) {
2689       cia[i+1] = cia[i] + collengths[i];
2690     }
2691     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2692     jj   = a->j;
2693     for (row=0; row<m; row++) {
2694       mr = a->i[row+1] - a->i[row];
2695       for (i=0; i<mr; i++) {
2696         col = *jj++;
2697         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2698       }
2699     }
2700     ierr = PetscFree(collengths);CHKERRQ(ierr);
2701     *ia = cia; *ja = cja;
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2708 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2709 {
2710   PetscErrorCode ierr;
2711 
2712   PetscFunctionBegin;
2713   if (!ia) PetscFunctionReturn(0);
2714   ierr = PetscFree(*ia);CHKERRQ(ierr);
2715   ierr = PetscFree(*ja);CHKERRQ(ierr);
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 #undef __FUNCT__
2720 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2721 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2722 {
2723   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2724   PetscErrorCode ierr;
2725   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2726   PetscScalar    dx,*y,*xx,*w3_array;
2727   PetscScalar    *vscale_array;
2728   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2729   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2730   void           *fctx = coloring->fctx;
2731   PetscBool      flg = PETSC_FALSE;
2732   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2733   Vec            x1_tmp;
2734 
2735   PetscFunctionBegin;
2736   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2737   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2738   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2739   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2740 
2741   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2742   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2743   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2744   if (flg) {
2745     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2746   } else {
2747     PetscBool  assembled;
2748     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2749     if (assembled) {
2750       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2751     }
2752   }
2753 
2754   x1_tmp = x1;
2755   if (!coloring->vscale) {
2756     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2757   }
2758 
2759   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2760     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2761   }
2762   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2763 
2764   /* Set w1 = F(x1) */
2765   if (!coloring->fset) {
2766     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2767     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2768     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2769   } else {
2770     coloring->fset = PETSC_FALSE;
2771   }
2772 
2773   if (!coloring->w3) {
2774     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2776   }
2777   w3 = coloring->w3;
2778 
2779     CHKMEMQ;
2780     /* Compute all the local scale factors, including ghost points */
2781   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2782   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2783   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2784   if (ctype == IS_COLORING_GHOSTED) {
2785     col_start = 0; col_end = N;
2786   } else if (ctype == IS_COLORING_GLOBAL) {
2787     xx = xx - start;
2788     vscale_array = vscale_array - start;
2789     col_start = start; col_end = N + start;
2790   }    CHKMEMQ;
2791   for (col=col_start; col<col_end; col++) {
2792     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2793     if (coloring->htype[0] == 'w') {
2794       dx = 1.0 + unorm;
2795     } else {
2796       dx  = xx[col];
2797     }
2798     if (dx == (PetscScalar)0.0) dx = 1.0;
2799     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2800     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2801     dx               *= epsilon;
2802     vscale_array[col] = (PetscScalar)1.0/dx;
2803   }     CHKMEMQ;
2804   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2805   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2806   if (ctype == IS_COLORING_GLOBAL) {
2807     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2808     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2809   }
2810   CHKMEMQ;
2811   if (coloring->vscaleforrow) {
2812     vscaleforrow = coloring->vscaleforrow;
2813   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2814 
2815   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2816   /*
2817     Loop over each color
2818   */
2819   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2820   for (k=0; k<coloring->ncolors; k++) {
2821     coloring->currentcolor = k;
2822     for (i=0; i<bs; i++) {
2823       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2824       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2825       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2826       /*
2827         Loop over each column associated with color
2828         adding the perturbation to the vector w3.
2829       */
2830       for (l=0; l<coloring->ncolumns[k]; l++) {
2831         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2832         if (coloring->htype[0] == 'w') {
2833           dx = 1.0 + unorm;
2834         } else {
2835           dx  = xx[col];
2836         }
2837         if (dx == (PetscScalar)0.0) dx = 1.0;
2838         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2839         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2840         dx            *= epsilon;
2841         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2842         w3_array[col] += dx;
2843       }
2844       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2845       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2846 
2847       /*
2848         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2849         w2 = F(x1 + dx) - F(x1)
2850       */
2851       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2852       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2853       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2854       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2855 
2856       /*
2857         Loop over rows of vector, putting results into Jacobian matrix
2858       */
2859       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2860       for (l=0; l<coloring->nrows[k]; l++) {
2861         row    = bs*coloring->rows[k][l];             /* local row index */
2862         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2863         for (j=0; j<bs; j++) {
2864           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2865           srows[j]  = row + start + j;
2866         }
2867         ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2868       }
2869       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2870     }
2871   } /* endof for each color */
2872   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2873   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2874   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2875   ierr = PetscFree(srows);CHKERRQ(ierr);
2876 
2877   coloring->currentcolor = -1;
2878   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 /* -------------------------------------------------------------------*/
2885 static struct _MatOps MatOps_Values = {
2886         MatSetValues_SeqBAIJ,
2887         MatGetRow_SeqBAIJ,
2888         MatRestoreRow_SeqBAIJ,
2889         MatMult_SeqBAIJ_N,
2890 /* 4*/  MatMultAdd_SeqBAIJ_N,
2891         MatMultTranspose_SeqBAIJ,
2892         MatMultTransposeAdd_SeqBAIJ,
2893         0,
2894         0,
2895         0,
2896 /* 10*/ 0,
2897         MatLUFactor_SeqBAIJ,
2898         0,
2899         0,
2900         MatTranspose_SeqBAIJ,
2901 /* 15*/ MatGetInfo_SeqBAIJ,
2902         MatEqual_SeqBAIJ,
2903         MatGetDiagonal_SeqBAIJ,
2904         MatDiagonalScale_SeqBAIJ,
2905         MatNorm_SeqBAIJ,
2906 /* 20*/ 0,
2907         MatAssemblyEnd_SeqBAIJ,
2908         MatSetOption_SeqBAIJ,
2909         MatZeroEntries_SeqBAIJ,
2910 /* 24*/ MatZeroRows_SeqBAIJ,
2911         0,
2912         0,
2913         0,
2914         0,
2915 /* 29*/ MatSetUp_SeqBAIJ,
2916         0,
2917         0,
2918         0,
2919         0,
2920 /* 34*/ MatDuplicate_SeqBAIJ,
2921         0,
2922         0,
2923         MatILUFactor_SeqBAIJ,
2924         0,
2925 /* 39*/ MatAXPY_SeqBAIJ,
2926         MatGetSubMatrices_SeqBAIJ,
2927         MatIncreaseOverlap_SeqBAIJ,
2928         MatGetValues_SeqBAIJ,
2929         MatCopy_SeqBAIJ,
2930 /* 44*/ 0,
2931         MatScale_SeqBAIJ,
2932         0,
2933         0,
2934         MatZeroRowsColumns_SeqBAIJ,
2935 /* 49*/ 0,
2936         MatGetRowIJ_SeqBAIJ,
2937         MatRestoreRowIJ_SeqBAIJ,
2938         MatGetColumnIJ_SeqBAIJ,
2939         MatRestoreColumnIJ_SeqBAIJ,
2940 /* 54*/ MatFDColoringCreate_SeqAIJ,
2941         0,
2942         0,
2943         0,
2944         MatSetValuesBlocked_SeqBAIJ,
2945 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2946         MatDestroy_SeqBAIJ,
2947         MatView_SeqBAIJ,
2948         0,
2949         0,
2950 /* 64*/ 0,
2951         0,
2952         0,
2953         0,
2954         0,
2955 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2956         0,
2957         MatConvert_Basic,
2958         0,
2959         0,
2960 /* 74*/ 0,
2961         MatFDColoringApply_BAIJ,
2962         0,
2963         0,
2964         0,
2965 /* 79*/ 0,
2966         0,
2967         0,
2968         0,
2969         MatLoad_SeqBAIJ,
2970 /* 84*/ 0,
2971         0,
2972         0,
2973         0,
2974         0,
2975 /* 89*/ 0,
2976         0,
2977         0,
2978         0,
2979         0,
2980 /* 94*/ 0,
2981         0,
2982         0,
2983         0,
2984         0,
2985 /* 99*/ 0,
2986         0,
2987         0,
2988         0,
2989         0,
2990 /*104*/ 0,
2991         MatRealPart_SeqBAIJ,
2992         MatImaginaryPart_SeqBAIJ,
2993         0,
2994         0,
2995 /*109*/ 0,
2996         0,
2997         0,
2998         0,
2999         MatMissingDiagonal_SeqBAIJ,
3000 /*114*/ 0,
3001         0,
3002         0,
3003         0,
3004         0,
3005 /*119*/ 0,
3006         0,
3007         MatMultHermitianTranspose_SeqBAIJ,
3008         MatMultHermitianTransposeAdd_SeqBAIJ,
3009         0,
3010 /*124*/ 0,
3011         0,
3012         MatInvertBlockDiagonal_SeqBAIJ
3013 };
3014 
3015 EXTERN_C_BEGIN
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3018 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3019 {
3020   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3021   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3022   PetscErrorCode ierr;
3023 
3024   PetscFunctionBegin;
3025   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3026 
3027   /* allocate space for values if not already there */
3028   if (!aij->saved_values) {
3029     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3030     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3031   }
3032 
3033   /* copy values over */
3034   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3035   PetscFunctionReturn(0);
3036 }
3037 EXTERN_C_END
3038 
3039 EXTERN_C_BEGIN
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3042 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3043 {
3044   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3045   PetscErrorCode ierr;
3046   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3047 
3048   PetscFunctionBegin;
3049   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3050   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3051 
3052   /* copy values over */
3053   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 EXTERN_C_END
3057 
3058 EXTERN_C_BEGIN
3059 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3060 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3061 EXTERN_C_END
3062 
3063 EXTERN_C_BEGIN
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3066 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3067 {
3068   Mat_SeqBAIJ    *b;
3069   PetscErrorCode ierr;
3070   PetscInt       i,mbs,nbs,bs2;
3071   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3072 
3073   PetscFunctionBegin;
3074   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3075   if (nz == MAT_SKIP_ALLOCATION) {
3076     skipallocation = PETSC_TRUE;
3077     nz             = 0;
3078   }
3079 
3080   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3081   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3082   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3083   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3084   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3085 
3086   B->preallocated = PETSC_TRUE;
3087 
3088   mbs  = B->rmap->n/bs;
3089   nbs  = B->cmap->n/bs;
3090   bs2  = bs*bs;
3091 
3092   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3093 
3094   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3095   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3096   if (nnz) {
3097     for (i=0; i<mbs; i++) {
3098       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3099       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3100     }
3101   }
3102 
3103   b       = (Mat_SeqBAIJ*)B->data;
3104   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3105     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3106   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3107 
3108   if (!flg) {
3109     switch (bs) {
3110     case 1:
3111       B->ops->mult            = MatMult_SeqBAIJ_1;
3112       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3113       B->ops->sor             = MatSOR_SeqBAIJ_1;
3114       break;
3115     case 2:
3116       B->ops->mult            = MatMult_SeqBAIJ_2;
3117       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3118       B->ops->sor             = MatSOR_SeqBAIJ_2;
3119       break;
3120     case 3:
3121       B->ops->mult            = MatMult_SeqBAIJ_3;
3122       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3123       B->ops->sor             = MatSOR_SeqBAIJ_3;
3124       break;
3125     case 4:
3126       B->ops->mult            = MatMult_SeqBAIJ_4;
3127       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3128       B->ops->sor             = MatSOR_SeqBAIJ_4;
3129       break;
3130     case 5:
3131       B->ops->mult            = MatMult_SeqBAIJ_5;
3132       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3133       B->ops->sor             = MatSOR_SeqBAIJ_5;
3134       break;
3135     case 6:
3136       B->ops->mult            = MatMult_SeqBAIJ_6;
3137       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3138       B->ops->sor             = MatSOR_SeqBAIJ_6;
3139       break;
3140     case 7:
3141       B->ops->mult            = MatMult_SeqBAIJ_7;
3142       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3143       B->ops->sor             = MatSOR_SeqBAIJ_7;
3144       break;
3145     case 15:
3146       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3147       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3148       B->ops->sor             = MatSOR_SeqBAIJ_N;
3149       break;
3150     default:
3151       B->ops->mult            = MatMult_SeqBAIJ_N;
3152       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3153       B->ops->sor             = MatSOR_SeqBAIJ_N;
3154       break;
3155     }
3156   }
3157   b->mbs       = mbs;
3158   b->nbs       = nbs;
3159   if (!skipallocation) {
3160     if (!b->imax) {
3161       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3162       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3163       b->free_imax_ilen = PETSC_TRUE;
3164     }
3165     /* b->ilen will count nonzeros in each block row so far. */
3166     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3167     if (!nnz) {
3168       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3169       else if (nz < 0) nz = 1;
3170       for (i=0; i<mbs; i++) b->imax[i] = nz;
3171       nz = nz*mbs;
3172     } else {
3173       nz = 0;
3174       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3175     }
3176 
3177     /* allocate the matrix space */
3178     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3179     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3180     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3181     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3182     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3183     b->singlemalloc = PETSC_TRUE;
3184     b->i[0] = 0;
3185     for (i=1; i<mbs+1; i++) {
3186       b->i[i] = b->i[i-1] + b->imax[i-1];
3187     }
3188     b->free_a     = PETSC_TRUE;
3189     b->free_ij    = PETSC_TRUE;
3190   } else {
3191     b->free_a     = PETSC_FALSE;
3192     b->free_ij    = PETSC_FALSE;
3193   }
3194 
3195   b->bs2              = bs2;
3196   b->mbs              = mbs;
3197   b->nz               = 0;
3198   b->maxnz            = nz;
3199   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3200   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3201   PetscFunctionReturn(0);
3202 }
3203 EXTERN_C_END
3204 
3205 EXTERN_C_BEGIN
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3208 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3209 {
3210   PetscInt       i,m,nz,nz_max=0,*nnz;
3211   PetscScalar    *values=0;
3212   PetscErrorCode ierr;
3213 
3214   PetscFunctionBegin;
3215   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3216   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3217   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3218   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3219   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3220   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3221   m = B->rmap->n/bs;
3222 
3223   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3224   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3225   for (i=0; i<m; i++) {
3226     nz = ii[i+1]- ii[i];
3227     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3228     nz_max = PetscMax(nz_max, nz);
3229     nnz[i] = nz;
3230   }
3231   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3232   ierr = PetscFree(nnz);CHKERRQ(ierr);
3233 
3234   values = (PetscScalar*)V;
3235   if (!values) {
3236     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3237     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3238   }
3239   for (i=0; i<m; i++) {
3240     PetscInt          ncols  = ii[i+1] - ii[i];
3241     const PetscInt    *icols = jj + ii[i];
3242     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3243     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3244   }
3245   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3246   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3247   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3248   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3249   PetscFunctionReturn(0);
3250 }
3251 EXTERN_C_END
3252 
3253 
3254 EXTERN_C_BEGIN
3255 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3256 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3257 #if defined(PETSC_HAVE_MUMPS)
3258 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3259 #endif
3260 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3261 EXTERN_C_END
3262 
3263 /*MC
3264    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3265    block sparse compressed row format.
3266 
3267    Options Database Keys:
3268 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3269 
3270   Level: beginner
3271 
3272 .seealso: MatCreateSeqBAIJ()
3273 M*/
3274 
3275 EXTERN_C_BEGIN
3276 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3277 EXTERN_C_END
3278 
3279 EXTERN_C_BEGIN
3280 #undef __FUNCT__
3281 #define __FUNCT__ "MatCreate_SeqBAIJ"
3282 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3283 {
3284   PetscErrorCode ierr;
3285   PetscMPIInt    size;
3286   Mat_SeqBAIJ    *b;
3287 
3288   PetscFunctionBegin;
3289   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3290   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3291 
3292   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3293   B->data = (void*)b;
3294   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3295   b->row                   = 0;
3296   b->col                   = 0;
3297   b->icol                  = 0;
3298   b->reallocs              = 0;
3299   b->saved_values          = 0;
3300 
3301   b->roworiented           = PETSC_TRUE;
3302   b->nonew                 = 0;
3303   b->diag                  = 0;
3304   b->solve_work            = 0;
3305   b->mult_work             = 0;
3306   B->spptr                 = 0;
3307   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3308   b->keepnonzeropattern    = PETSC_FALSE;
3309   b->xtoy                  = 0;
3310   b->XtoY                  = 0;
3311   B->same_nonzero          = PETSC_FALSE;
3312 
3313   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3314                                      "MatGetFactorAvailable_seqbaij_petsc",
3315                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3316   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3317                                      "MatGetFactor_seqbaij_petsc",
3318                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3319   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3320                                      "MatGetFactor_seqbaij_bstrm",
3321                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3322 #if defined(PETSC_HAVE_MUMPS)
3323   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3324 #endif
3325   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3326                                      "MatInvertBlockDiagonal_SeqBAIJ",
3327                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3328   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3329                                      "MatStoreValues_SeqBAIJ",
3330                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3331   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3332                                      "MatRetrieveValues_SeqBAIJ",
3333                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3334   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3335                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3336                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3338                                      "MatConvert_SeqBAIJ_SeqAIJ",
3339                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3341                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3342                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3344                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3345                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3347                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3348                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3350                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3351                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3353                                      "MatIsTranspose_SeqBAIJ",
3354                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3355   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3356   PetscFunctionReturn(0);
3357 }
3358 EXTERN_C_END
3359 
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3362 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3363 {
3364   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3365   PetscErrorCode ierr;
3366   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3367 
3368   PetscFunctionBegin;
3369   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3370 
3371   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3372     c->imax = a->imax;
3373     c->ilen = a->ilen;
3374     c->free_imax_ilen = PETSC_FALSE;
3375   } else {
3376     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3377     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3378     for (i=0; i<mbs; i++) {
3379       c->imax[i] = a->imax[i];
3380       c->ilen[i] = a->ilen[i];
3381     }
3382     c->free_imax_ilen = PETSC_TRUE;
3383   }
3384 
3385   /* allocate the matrix space */
3386   if (mallocmatspace) {
3387     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3388       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3389       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3390       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3391       c->i            = a->i;
3392       c->j            = a->j;
3393       c->singlemalloc = PETSC_FALSE;
3394       c->free_a       = PETSC_TRUE;
3395       c->free_ij      = PETSC_FALSE;
3396       c->parent       = A;
3397       C->preallocated = PETSC_TRUE;
3398       C->assembled    = PETSC_TRUE;
3399       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3400       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3401       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3402     } else {
3403       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3404       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3405       c->singlemalloc = PETSC_TRUE;
3406       c->free_a       = PETSC_TRUE;
3407       c->free_ij      = PETSC_TRUE;
3408       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3409       if (mbs > 0) {
3410         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3411         if (cpvalues == MAT_COPY_VALUES) {
3412           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3413         } else {
3414           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3415         }
3416       }
3417       C->preallocated = PETSC_TRUE;
3418       C->assembled    = PETSC_TRUE;
3419     }
3420   }
3421 
3422   c->roworiented = a->roworiented;
3423   c->nonew       = a->nonew;
3424   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3425   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3426   c->bs2         = a->bs2;
3427   c->mbs         = a->mbs;
3428   c->nbs         = a->nbs;
3429 
3430   if (a->diag) {
3431     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3432       c->diag      = a->diag;
3433       c->free_diag = PETSC_FALSE;
3434     } else {
3435       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3436       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3437       for (i=0; i<mbs; i++) {
3438         c->diag[i] = a->diag[i];
3439       }
3440       c->free_diag = PETSC_TRUE;
3441     }
3442   } else c->diag        = 0;
3443   c->nz                 = a->nz;
3444   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3445   c->solve_work         = 0;
3446   c->mult_work          = 0;
3447 
3448   c->compressedrow.use     = a->compressedrow.use;
3449   c->compressedrow.nrows   = a->compressedrow.nrows;
3450   c->compressedrow.check   = a->compressedrow.check;
3451   if (a->compressedrow.use) {
3452     i = a->compressedrow.nrows;
3453     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3454     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3455     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3456     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3457   } else {
3458     c->compressedrow.use    = PETSC_FALSE;
3459     c->compressedrow.i      = PETSC_NULL;
3460     c->compressedrow.rindex = PETSC_NULL;
3461   }
3462   C->same_nonzero = A->same_nonzero;
3463   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3464   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3470 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3471 {
3472   PetscErrorCode ierr;
3473 
3474   PetscFunctionBegin;
3475   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3476   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3477   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3478   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatLoad_SeqBAIJ"
3484 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3485 {
3486   Mat_SeqBAIJ    *a;
3487   PetscErrorCode ierr;
3488   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3489   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3490   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3491   PetscInt       *masked,nmask,tmp,bs2,ishift;
3492   PetscMPIInt    size;
3493   int            fd;
3494   PetscScalar    *aa;
3495   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3496 
3497   PetscFunctionBegin;
3498   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3499   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3500   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3501   bs2  = bs*bs;
3502 
3503   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3504   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3505   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3506   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3507   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3508   M = header[1]; N = header[2]; nz = header[3];
3509 
3510   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3511   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3512 
3513   /*
3514      This code adds extra rows to make sure the number of rows is
3515     divisible by the blocksize
3516   */
3517   mbs        = M/bs;
3518   extra_rows = bs - M + bs*(mbs);
3519   if (extra_rows == bs) extra_rows = 0;
3520   else                  mbs++;
3521   if (extra_rows) {
3522     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3523   }
3524 
3525   /* Set global sizes if not already set */
3526   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3527     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3528   } else { /* Check if the matrix global sizes are correct */
3529     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3530     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3531       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3532     }
3533     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3534   }
3535 
3536   /* read in row lengths */
3537   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3538   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3539   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3540 
3541   /* read in column indices */
3542   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3543   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3544   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3545 
3546   /* loop over row lengths determining block row lengths */
3547   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3548   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3549   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3550   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3551   rowcount = 0;
3552   nzcount = 0;
3553   for (i=0; i<mbs; i++) {
3554     nmask = 0;
3555     for (j=0; j<bs; j++) {
3556       kmax = rowlengths[rowcount];
3557       for (k=0; k<kmax; k++) {
3558         tmp = jj[nzcount++]/bs;
3559         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3560       }
3561       rowcount++;
3562     }
3563     browlengths[i] += nmask;
3564     /* zero out the mask elements we set */
3565     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3566   }
3567 
3568   /* Do preallocation  */
3569   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3570   a = (Mat_SeqBAIJ*)newmat->data;
3571 
3572   /* set matrix "i" values */
3573   a->i[0] = 0;
3574   for (i=1; i<= mbs; i++) {
3575     a->i[i]      = a->i[i-1] + browlengths[i-1];
3576     a->ilen[i-1] = browlengths[i-1];
3577   }
3578   a->nz         = 0;
3579   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3580 
3581   /* read in nonzero values */
3582   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3583   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3585 
3586   /* set "a" and "j" values into matrix */
3587   nzcount = 0; jcount = 0;
3588   for (i=0; i<mbs; i++) {
3589     nzcountb = nzcount;
3590     nmask    = 0;
3591     for (j=0; j<bs; j++) {
3592       kmax = rowlengths[i*bs+j];
3593       for (k=0; k<kmax; k++) {
3594         tmp = jj[nzcount++]/bs;
3595         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3596       }
3597     }
3598     /* sort the masked values */
3599     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3600 
3601     /* set "j" values into matrix */
3602     maskcount = 1;
3603     for (j=0; j<nmask; j++) {
3604       a->j[jcount++]  = masked[j];
3605       mask[masked[j]] = maskcount++;
3606     }
3607     /* set "a" values into matrix */
3608     ishift = bs2*a->i[i];
3609     for (j=0; j<bs; j++) {
3610       kmax = rowlengths[i*bs+j];
3611       for (k=0; k<kmax; k++) {
3612         tmp       = jj[nzcountb]/bs ;
3613         block     = mask[tmp] - 1;
3614         point     = jj[nzcountb] - bs*tmp;
3615         idx       = ishift + bs2*block + j + bs*point;
3616         a->a[idx] = (MatScalar)aa[nzcountb++];
3617       }
3618     }
3619     /* zero out the mask elements we set */
3620     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3621   }
3622   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3623 
3624   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3625   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3626   ierr = PetscFree(aa);CHKERRQ(ierr);
3627   ierr = PetscFree(jj);CHKERRQ(ierr);
3628   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3629 
3630   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatCreateSeqBAIJ"
3637 /*@C
3638    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3639    compressed row) format.  For good matrix assembly performance the
3640    user should preallocate the matrix storage by setting the parameter nz
3641    (or the array nnz).  By setting these parameters accurately, performance
3642    during matrix assembly can be increased by more than a factor of 50.
3643 
3644    Collective on MPI_Comm
3645 
3646    Input Parameters:
3647 +  comm - MPI communicator, set to PETSC_COMM_SELF
3648 .  bs - size of block
3649 .  m - number of rows
3650 .  n - number of columns
3651 .  nz - number of nonzero blocks  per block row (same for all rows)
3652 -  nnz - array containing the number of nonzero blocks in the various block rows
3653          (possibly different for each block row) or PETSC_NULL
3654 
3655    Output Parameter:
3656 .  A - the matrix
3657 
3658    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3659    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3660    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3661 
3662    Options Database Keys:
3663 .   -mat_no_unroll - uses code that does not unroll the loops in the
3664                      block calculations (much slower)
3665 .    -mat_block_size - size of the blocks to use
3666 
3667    Level: intermediate
3668 
3669    Notes:
3670    The number of rows and columns must be divisible by blocksize.
3671 
3672    If the nnz parameter is given then the nz parameter is ignored
3673 
3674    A nonzero block is any block that as 1 or more nonzeros in it
3675 
3676    The block AIJ format is fully compatible with standard Fortran 77
3677    storage.  That is, the stored row and column indices can begin at
3678    either one (as in Fortran) or zero.  See the users' manual for details.
3679 
3680    Specify the preallocated storage with either nz or nnz (not both).
3681    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3682    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3683    matrices.
3684 
3685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3686 @*/
3687 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3688 {
3689   PetscErrorCode ierr;
3690 
3691   PetscFunctionBegin;
3692   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3693   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3694   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3695   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 #undef __FUNCT__
3700 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3701 /*@C
3702    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3703    per row in the matrix. For good matrix assembly performance the
3704    user should preallocate the matrix storage by setting the parameter nz
3705    (or the array nnz).  By setting these parameters accurately, performance
3706    during matrix assembly can be increased by more than a factor of 50.
3707 
3708    Collective on MPI_Comm
3709 
3710    Input Parameters:
3711 +  A - the matrix
3712 .  bs - size of block
3713 .  nz - number of block nonzeros per block row (same for all rows)
3714 -  nnz - array containing the number of block nonzeros in the various block rows
3715          (possibly different for each block row) or PETSC_NULL
3716 
3717    Options Database Keys:
3718 .   -mat_no_unroll - uses code that does not unroll the loops in the
3719                      block calculations (much slower)
3720 .    -mat_block_size - size of the blocks to use
3721 
3722    Level: intermediate
3723 
3724    Notes:
3725    If the nnz parameter is given then the nz parameter is ignored
3726 
3727    You can call MatGetInfo() to get information on how effective the preallocation was;
3728    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3729    You can also run with the option -info and look for messages with the string
3730    malloc in them to see if additional memory allocation was needed.
3731 
3732    The block AIJ format is fully compatible with standard Fortran 77
3733    storage.  That is, the stored row and column indices can begin at
3734    either one (as in Fortran) or zero.  See the users' manual for details.
3735 
3736    Specify the preallocated storage with either nz or nnz (not both).
3737    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3738    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3739 
3740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3741 @*/
3742 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3743 {
3744   PetscErrorCode ierr;
3745 
3746   PetscFunctionBegin;
3747   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3748   PetscValidType(B,1);
3749   PetscValidLogicalCollectiveInt(B,bs,2);
3750   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3751   PetscFunctionReturn(0);
3752 }
3753 
3754 #undef __FUNCT__
3755 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3756 /*@C
3757    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3758    (the default sequential PETSc format).
3759 
3760    Collective on MPI_Comm
3761 
3762    Input Parameters:
3763 +  A - the matrix
3764 .  i - the indices into j for the start of each local row (starts with zero)
3765 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3766 -  v - optional values in the matrix
3767 
3768    Level: developer
3769 
3770 .keywords: matrix, aij, compressed row, sparse
3771 
3772 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3773 @*/
3774 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3775 {
3776   PetscErrorCode ierr;
3777 
3778   PetscFunctionBegin;
3779   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3780   PetscValidType(B,1);
3781   PetscValidLogicalCollectiveInt(B,bs,2);
3782   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 
3787 #undef __FUNCT__
3788 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3789 /*@
3790      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3791 
3792      Collective on MPI_Comm
3793 
3794    Input Parameters:
3795 +  comm - must be an MPI communicator of size 1
3796 .  bs - size of block
3797 .  m - number of rows
3798 .  n - number of columns
3799 .  i - row indices
3800 .  j - column indices
3801 -  a - matrix values
3802 
3803    Output Parameter:
3804 .  mat - the matrix
3805 
3806    Level: advanced
3807 
3808    Notes:
3809        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3810     once the matrix is destroyed
3811 
3812        You cannot set new nonzero locations into this matrix, that will generate an error.
3813 
3814        The i and j indices are 0 based
3815 
3816        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3817 
3818 
3819 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3820 
3821 @*/
3822 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3823 {
3824   PetscErrorCode ierr;
3825   PetscInt       ii;
3826   Mat_SeqBAIJ    *baij;
3827 
3828   PetscFunctionBegin;
3829   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3830   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3831 
3832   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3833   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3834   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3835   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3836   baij = (Mat_SeqBAIJ*)(*mat)->data;
3837   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3838   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3839 
3840   baij->i = i;
3841   baij->j = j;
3842   baij->a = a;
3843   baij->singlemalloc = PETSC_FALSE;
3844   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3845   baij->free_a       = PETSC_FALSE;
3846   baij->free_ij      = PETSC_FALSE;
3847 
3848   for (ii=0; ii<m; ii++) {
3849     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3850 #if defined(PETSC_USE_DEBUG)
3851     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3852 #endif
3853   }
3854 #if defined(PETSC_USE_DEBUG)
3855   for (ii=0; ii<baij->i[m]; ii++) {
3856     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3857     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3858   }
3859 #endif
3860 
3861   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3862   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865