xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cbfebe2e3d702f86f165d2c287dad1fbd9bf01f6)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*ii = a->i,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !ii) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (diag[i] >= ii[i+1]) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1211 
1212   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1213   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1214   ierr = PetscFree(A->data);CHKERRQ(ierr);
1215 
1216   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_DIAGONALS:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1261     break;
1262   case MAT_SPD:
1263   case MAT_SYMMETRIC:
1264   case MAT_STRUCTURALLY_SYMMETRIC:
1265   case MAT_HERMITIAN:
1266   case MAT_SYMMETRY_ETERNAL:
1267     /* These options are handled directly by MatSetOption() */
1268     break;
1269   default:
1270     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1279 {
1280   PetscErrorCode ierr;
1281   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1282   MatScalar      *aa_i;
1283   PetscScalar    *v_i;
1284 
1285   PetscFunctionBegin;
1286   bs  = A->rmap->bs;
1287   bs2 = bs*bs;
1288   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1289 
1290   bn  = row/bs;   /* Block number */
1291   bp  = row % bs; /* Block Position */
1292   M   = ai[bn+1] - ai[bn];
1293   *nz = bs*M;
1294 
1295   if (v) {
1296     *v = 0;
1297     if (*nz) {
1298       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1299       for (i=0; i<M; i++) { /* for each block in the block row */
1300         v_i  = *v + i*bs;
1301         aa_i = aa + bs2*(ai[bn] + i);
1302         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1303       }
1304     }
1305   }
1306 
1307   if (idx) {
1308     *idx = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         idx_i = *idx + i*bs;
1313         itmp  = bs*aj[ai[bn] + i];
1314         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1315       }
1316     }
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1323 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1335 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1336 {
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1341   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1349 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1350 {
1351   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1352   Mat            C;
1353   PetscErrorCode ierr;
1354   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1355   PetscInt       *rows,*cols,bs2=a->bs2;
1356   MatScalar      *array;
1357 
1358   PetscFunctionBegin;
1359   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1360   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1361     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1362 
1363     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1364     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1365     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1366     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1367     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1368     ierr = PetscFree(col);CHKERRQ(ierr);
1369   } else {
1370     C = *B;
1371   }
1372 
1373   array = a->a;
1374   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1375   for (i=0; i<mbs; i++) {
1376     cols[0] = i*bs;
1377     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1378     len = ai[i+1] - ai[i];
1379     for (j=0; j<len; j++) {
1380       rows[0] = (*aj++)*bs;
1381       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1382       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1383       array += bs2;
1384     }
1385   }
1386   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1387 
1388   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390 
1391   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1392     *B = C;
1393   } else {
1394     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1401 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1402 {
1403   PetscErrorCode ierr;
1404   Mat            Btrans;
1405 
1406   PetscFunctionBegin;
1407   *f   = PETSC_FALSE;
1408   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1409   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1410   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417 {
1418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421   int            fd;
1422   PetscScalar    *aa;
1423   FILE           *file;
1424 
1425   PetscFunctionBegin;
1426   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1427   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1428   col_lens[0] = MAT_FILE_CLASSID;
1429 
1430   col_lens[1] = A->rmap->N;
1431   col_lens[2] = A->cmap->n;
1432   col_lens[3] = a->nz*bs2;
1433 
1434   /* store lengths of each row and write (including header) to file */
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1442   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1443 
1444   /* store column indices (zero start index) */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           jj[count++] = bs*a->j[k] + l;
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(jj);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           aa[count++] = a->a[bs2*k + l*bs + j];
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(aa);CHKERRQ(ierr);
1473 
1474   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1475   if (file) {
1476     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1483 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1484 {
1485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1486   PetscErrorCode    ierr;
1487   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1488   PetscViewerFormat format;
1489 
1490   PetscFunctionBegin;
1491   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1492   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1493     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1494   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1495     Mat aij;
1496     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1497     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1498     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1500       PetscFunctionReturn(0);
1501   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1502     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1503     for (i=0; i<a->mbs; i++) {
1504       for (j=0; j<bs; j++) {
1505         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1506         for (k=a->i[i]; k<a->i[i+1]; k++) {
1507           for (l=0; l<bs; l++) {
1508 #if defined(PETSC_USE_COMPLEX)
1509             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1510               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1511                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1512             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1513               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1514                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1515             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1516               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1517             }
1518 #else
1519             if (a->a[bs2*k + l*bs + j] != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1521             }
1522 #endif
1523           }
1524         }
1525         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1526       }
1527     }
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1529   } else {
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1531     for (i=0; i<a->mbs; i++) {
1532       for (j=0; j<bs; j++) {
1533         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1534         for (k=a->i[i]; k<a->i[i+1]; k++) {
1535           for (l=0; l<bs; l++) {
1536 #if defined(PETSC_USE_COMPLEX)
1537             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1542                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             } else {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             }
1546 #else
1547             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   }
1556   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #include <petscdraw.h>
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1563 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1564 {
1565   Mat               A = (Mat) Aa;
1566   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1567   PetscErrorCode    ierr;
1568   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1569   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1570   MatScalar         *aa;
1571   PetscViewer       viewer;
1572   PetscViewerFormat format;
1573 
1574   PetscFunctionBegin;
1575   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1576   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1577 
1578   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1579 
1580   /* loop over matrix elements drawing boxes */
1581 
1582   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1583     color = PETSC_DRAW_BLUE;
1584     for (i=0,row=0; i<mbs; i++,row+=bs) {
1585       for (j=a->i[i]; j<a->i[i+1]; j++) {
1586         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1587         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1588         aa  = a->a + j*bs2;
1589         for (k=0; k<bs; k++) {
1590           for (l=0; l<bs; l++) {
1591             if (PetscRealPart(*aa++) >=  0.) continue;
1592             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1593           }
1594         }
1595       }
1596     }
1597     color = PETSC_DRAW_CYAN;
1598     for (i=0,row=0; i<mbs; i++,row+=bs) {
1599       for (j=a->i[i]; j<a->i[i+1]; j++) {
1600         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1601         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1602         aa  = a->a + j*bs2;
1603         for (k=0; k<bs; k++) {
1604           for (l=0; l<bs; l++) {
1605             if (PetscRealPart(*aa++) != 0.) continue;
1606             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1607           }
1608         }
1609       }
1610     }
1611     color = PETSC_DRAW_RED;
1612     for (i=0,row=0; i<mbs; i++,row+=bs) {
1613       for (j=a->i[i]; j<a->i[i+1]; j++) {
1614         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616         aa  = a->a + j*bs2;
1617         for (k=0; k<bs; k++) {
1618           for (l=0; l<bs; l++) {
1619             if (PetscRealPart(*aa++) <= 0.) continue;
1620             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1621           }
1622         }
1623       }
1624     }
1625   } else {
1626     /* use contour shading to indicate magnitude of values */
1627     /* first determine max of all nonzero values */
1628     PetscDraw popup;
1629     PetscReal scale,maxv = 0.0;
1630 
1631     for (i=0; i<a->nz*a->bs2; i++) {
1632       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1633     }
1634     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1635     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1636     if (popup) {
1637       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1638     }
1639     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643         aa  = a->a + j*bs2;
1644         for (k=0; k<bs; k++) {
1645           for (l=0; l<bs; l++) {
1646             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1647             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648           }
1649         }
1650       }
1651     }
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1658 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1659 {
1660   PetscErrorCode ierr;
1661   PetscReal      xl,yl,xr,yr,w,h;
1662   PetscDraw      draw;
1663   PetscBool      isnull;
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1667   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1668 
1669   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1670   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1671   xr  += w;    yr += h;  xl = -w;     yl = -h;
1672   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1673   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1674   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatView_SeqBAIJ"
1680 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1681 {
1682   PetscErrorCode ierr;
1683   PetscBool      iascii,isbinary,isdraw;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1689   if (iascii) {
1690     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1691   } else if (isbinary) {
1692     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1693   } else if (isdraw) {
1694     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1695   } else {
1696     Mat B;
1697     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1698     ierr = MatView(B,viewer);CHKERRQ(ierr);
1699     ierr = MatDestroy(&B);CHKERRQ(ierr);
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1707 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1708 {
1709   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1710   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1711   PetscInt    *ai = a->i,*ailen = a->ilen;
1712   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1713   MatScalar   *ap,*aa = a->a;
1714 
1715   PetscFunctionBegin;
1716   for (k=0; k<m; k++) { /* loop over rows */
1717     row = im[k]; brow = row/bs;
1718     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1719     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1720     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1721     nrow = ailen[brow];
1722     for (l=0; l<n; l++) { /* loop over columns */
1723       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1724       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1725       col  = in[l];
1726       bcol = col/bs;
1727       cidx = col%bs;
1728       ridx = row%bs;
1729       high = nrow;
1730       low  = 0; /* assume unsorted */
1731       while (high-low > 5) {
1732         t = (low+high)/2;
1733         if (rp[t] > bcol) high = t;
1734         else             low  = t;
1735       }
1736       for (i=low; i<high; i++) {
1737         if (rp[i] > bcol) break;
1738         if (rp[i] == bcol) {
1739           *v++ = ap[bs2*i+bs*cidx+ridx];
1740           goto finished;
1741         }
1742       }
1743       *v++ = 0.0;
1744 finished:;
1745     }
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1752 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1753 {
1754   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1755   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1756   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1757   PetscErrorCode    ierr;
1758   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1759   PetscBool         roworiented=a->roworiented;
1760   const PetscScalar *value     = v;
1761   MatScalar         *ap,*aa = a->a,*bap;
1762 
1763   PetscFunctionBegin;
1764   if (roworiented) {
1765     stepval = (n-1)*bs;
1766   } else {
1767     stepval = (m-1)*bs;
1768   }
1769   for (k=0; k<m; k++) { /* loop over added rows */
1770     row = im[k];
1771     if (row < 0) continue;
1772 #if defined(PETSC_USE_DEBUG)
1773     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1774 #endif
1775     rp   = aj + ai[row];
1776     ap   = aa + bs2*ai[row];
1777     rmax = imax[row];
1778     nrow = ailen[row];
1779     low  = 0;
1780     high = nrow;
1781     for (l=0; l<n; l++) { /* loop over added columns */
1782       if (in[l] < 0) continue;
1783 #if defined(PETSC_USE_DEBUG)
1784       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1785 #endif
1786       col = in[l];
1787       if (roworiented) {
1788         value = v + (k*(stepval+bs) + l)*bs;
1789       } else {
1790         value = v + (l*(stepval+bs) + k)*bs;
1791       }
1792       if (col <= lastcol) low = 0;
1793       else high = nrow;
1794       lastcol = col;
1795       while (high-low > 7) {
1796         t = (low+high)/2;
1797         if (rp[t] > col) high = t;
1798         else             low  = t;
1799       }
1800       for (i=low; i<high; i++) {
1801         if (rp[i] > col) break;
1802         if (rp[i] == col) {
1803           bap = ap +  bs2*i;
1804           if (roworiented) {
1805             if (is == ADD_VALUES) {
1806               for (ii=0; ii<bs; ii++,value+=stepval) {
1807                 for (jj=ii; jj<bs2; jj+=bs) {
1808                   bap[jj] += *value++;
1809                 }
1810               }
1811             } else {
1812               for (ii=0; ii<bs; ii++,value+=stepval) {
1813                 for (jj=ii; jj<bs2; jj+=bs) {
1814                   bap[jj] = *value++;
1815                 }
1816               }
1817             }
1818           } else {
1819             if (is == ADD_VALUES) {
1820               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1821                 for (jj=0; jj<bs; jj++) {
1822                   bap[jj] += value[jj];
1823                 }
1824                 bap += bs;
1825               }
1826             } else {
1827               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1828                 for (jj=0; jj<bs; jj++) {
1829                   bap[jj]  = value[jj];
1830                 }
1831                 bap += bs;
1832               }
1833             }
1834           }
1835           goto noinsert2;
1836         }
1837       }
1838       if (nonew == 1) goto noinsert2;
1839       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1840       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1841       N = nrow++ - 1; high++;
1842       /* shift up all the later entries in this row */
1843       for (ii=N; ii>=i; ii--) {
1844         rp[ii+1] = rp[ii];
1845         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1846       }
1847       if (N >= i) {
1848         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1849       }
1850       rp[i] = col;
1851       bap   = ap +  bs2*i;
1852       if (roworiented) {
1853         for (ii=0; ii<bs; ii++,value+=stepval) {
1854           for (jj=ii; jj<bs2; jj+=bs) {
1855             bap[jj] = *value++;
1856           }
1857         }
1858       } else {
1859         for (ii=0; ii<bs; ii++,value+=stepval) {
1860           for (jj=0; jj<bs; jj++) {
1861             *bap++ = *value++;
1862           }
1863         }
1864       }
1865 noinsert2:;
1866       low = i;
1867     }
1868     ailen[row] = nrow;
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1875 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1876 {
1877   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1878   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1879   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1880   PetscErrorCode ierr;
1881   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1882   MatScalar      *aa  = a->a,*ap;
1883   PetscReal      ratio=0.6;
1884 
1885   PetscFunctionBegin;
1886   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1887 
1888   if (m) rmax = ailen[0];
1889   for (i=1; i<mbs; i++) {
1890     /* move each row back by the amount of empty slots (fshift) before it*/
1891     fshift += imax[i-1] - ailen[i-1];
1892     rmax    = PetscMax(rmax,ailen[i]);
1893     if (fshift) {
1894       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1895       N  = ailen[i];
1896       for (j=0; j<N; j++) {
1897         ip[j-fshift] = ip[j];
1898 
1899         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1900       }
1901     }
1902     ai[i] = ai[i-1] + ailen[i-1];
1903   }
1904   if (mbs) {
1905     fshift += imax[mbs-1] - ailen[mbs-1];
1906     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1907   }
1908 
1909   /* reset ilen and imax for each row */
1910   a->nonzerorowcnt = 0;
1911   for (i=0; i<mbs; i++) {
1912     ailen[i] = imax[i] = ai[i+1] - ai[i];
1913     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1914   }
1915   a->nz = ai[mbs];
1916 
1917   /* diagonals may have moved, so kill the diagonal pointers */
1918   a->idiagvalid = PETSC_FALSE;
1919   if (fshift && a->diag) {
1920     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1921     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1922     a->diag = 0;
1923   }
1924   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1925   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1926   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1927   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1928 
1929   A->info.mallocs    += a->reallocs;
1930   a->reallocs         = 0;
1931   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1932 
1933   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 /*
1938    This function returns an array of flags which indicate the locations of contiguous
1939    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1940    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1941    Assume: sizes should be long enough to hold all the values.
1942 */
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1945 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1946 {
1947   PetscInt  i,j,k,row;
1948   PetscBool flg;
1949 
1950   PetscFunctionBegin;
1951   for (i=0,j=0; i<n; j++) {
1952     row = idx[i];
1953     if (row%bs!=0) { /* Not the begining of a block */
1954       sizes[j] = 1;
1955       i++;
1956     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1957       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1958       i++;
1959     } else { /* Begining of the block, so check if the complete block exists */
1960       flg = PETSC_TRUE;
1961       for (k=1; k<bs; k++) {
1962         if (row+k != idx[i+k]) { /* break in the block */
1963           flg = PETSC_FALSE;
1964           break;
1965         }
1966       }
1967       if (flg) { /* No break in the bs */
1968         sizes[j] = bs;
1969         i       += bs;
1970       } else {
1971         sizes[j] = 1;
1972         i++;
1973       }
1974     }
1975   }
1976   *bs_max = j;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1982 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1983 {
1984   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1985   PetscErrorCode    ierr;
1986   PetscInt          i,j,k,count,*rows;
1987   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1988   PetscScalar       zero = 0.0;
1989   MatScalar         *aa;
1990   const PetscScalar *xx;
1991   PetscScalar       *bb;
1992 
1993   PetscFunctionBegin;
1994   /* fix right hand side if needed */
1995   if (x && b) {
1996     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1997     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1998     for (i=0; i<is_n; i++) {
1999       bb[is_idx[i]] = diag*xx[is_idx[i]];
2000     }
2001     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2002     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2003   }
2004 
2005   /* Make a copy of the IS and  sort it */
2006   /* allocate memory for rows,sizes */
2007   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2008 
2009   /* copy IS values to rows, and sort them */
2010   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2011   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2012 
2013   if (baij->keepnonzeropattern) {
2014     for (i=0; i<is_n; i++) sizes[i] = 1;
2015     bs_max          = is_n;
2016   } else {
2017     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2018     A->nonzerostate++;
2019   }
2020 
2021   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2022     row = rows[j];
2023     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2024     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2025     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2026     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2027       if (diag != (PetscScalar)0.0) {
2028         if (baij->ilen[row/bs] > 0) {
2029           baij->ilen[row/bs]       = 1;
2030           baij->j[baij->i[row/bs]] = row/bs;
2031 
2032           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2033         }
2034         /* Now insert all the diagonal values for this bs */
2035         for (k=0; k<bs; k++) {
2036           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2037         }
2038       } else { /* (diag == 0.0) */
2039         baij->ilen[row/bs] = 0;
2040       } /* end (diag == 0.0) */
2041     } else { /* (sizes[i] != bs) */
2042 #if defined(PETSC_USE_DEBUG)
2043       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2044 #endif
2045       for (k=0; k<count; k++) {
2046         aa[0] =  zero;
2047         aa   += bs;
2048       }
2049       if (diag != (PetscScalar)0.0) {
2050         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051       }
2052     }
2053   }
2054 
2055   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2056   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2062 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2063 {
2064   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2065   PetscErrorCode    ierr;
2066   PetscInt          i,j,k,count;
2067   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2068   PetscScalar       zero = 0.0;
2069   MatScalar         *aa;
2070   const PetscScalar *xx;
2071   PetscScalar       *bb;
2072   PetscBool         *zeroed,vecs = PETSC_FALSE;
2073 
2074   PetscFunctionBegin;
2075   /* fix right hand side if needed */
2076   if (x && b) {
2077     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2078     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2079     vecs = PETSC_TRUE;
2080   }
2081 
2082   /* zero the columns */
2083   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2084   for (i=0; i<is_n; i++) {
2085     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2086     zeroed[is_idx[i]] = PETSC_TRUE;
2087   }
2088   for (i=0; i<A->rmap->N; i++) {
2089     if (!zeroed[i]) {
2090       row = i/bs;
2091       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2092         for (k=0; k<bs; k++) {
2093           col = bs*baij->j[j] + k;
2094           if (zeroed[col]) {
2095             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2096             if (vecs) bb[i] -= aa[0]*xx[col];
2097             aa[0] = 0.0;
2098           }
2099         }
2100       }
2101     } else if (vecs) bb[i] = diag*xx[i];
2102   }
2103   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2104   if (vecs) {
2105     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2106     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2107   }
2108 
2109   /* zero the rows */
2110   for (i=0; i<is_n; i++) {
2111     row   = is_idx[i];
2112     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2113     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2114     for (k=0; k<count; k++) {
2115       aa[0] =  zero;
2116       aa   += bs;
2117     }
2118     if (diag != (PetscScalar)0.0) {
2119       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2120     }
2121   }
2122   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2128 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2129 {
2130   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2131   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2132   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2133   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2134   PetscErrorCode ierr;
2135   PetscInt       ridx,cidx,bs2=a->bs2;
2136   PetscBool      roworiented=a->roworiented;
2137   MatScalar      *ap,value,*aa=a->a,*bap;
2138 
2139   PetscFunctionBegin;
2140   for (k=0; k<m; k++) { /* loop over added rows */
2141     row  = im[k];
2142     brow = row/bs;
2143     if (row < 0) continue;
2144 #if defined(PETSC_USE_DEBUG)
2145     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2146 #endif
2147     rp   = aj + ai[brow];
2148     ap   = aa + bs2*ai[brow];
2149     rmax = imax[brow];
2150     nrow = ailen[brow];
2151     low  = 0;
2152     high = nrow;
2153     for (l=0; l<n; l++) { /* loop over added columns */
2154       if (in[l] < 0) continue;
2155 #if defined(PETSC_USE_DEBUG)
2156       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2157 #endif
2158       col  = in[l]; bcol = col/bs;
2159       ridx = row % bs; cidx = col % bs;
2160       if (roworiented) {
2161         value = v[l + k*n];
2162       } else {
2163         value = v[k + l*m];
2164       }
2165       if (col <= lastcol) low = 0; else high = nrow;
2166       lastcol = col;
2167       while (high-low > 7) {
2168         t = (low+high)/2;
2169         if (rp[t] > bcol) high = t;
2170         else              low  = t;
2171       }
2172       for (i=low; i<high; i++) {
2173         if (rp[i] > bcol) break;
2174         if (rp[i] == bcol) {
2175           bap = ap +  bs2*i + bs*cidx + ridx;
2176           if (is == ADD_VALUES) *bap += value;
2177           else                  *bap  = value;
2178           goto noinsert1;
2179         }
2180       }
2181       if (nonew == 1) goto noinsert1;
2182       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2183       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2184       N = nrow++ - 1; high++;
2185       /* shift up all the later entries in this row */
2186       for (ii=N; ii>=i; ii--) {
2187         rp[ii+1] = rp[ii];
2188         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2189       }
2190       if (N>=i) {
2191         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2192       }
2193       rp[i]                      = bcol;
2194       ap[bs2*i + bs*cidx + ridx] = value;
2195       a->nz++;
2196       A->nonzerostate++;
2197 noinsert1:;
2198       low = i;
2199     }
2200     ailen[brow] = nrow;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2207 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2208 {
2209   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2210   Mat            outA;
2211   PetscErrorCode ierr;
2212   PetscBool      row_identity,col_identity;
2213 
2214   PetscFunctionBegin;
2215   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2216   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2217   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2218   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2219 
2220   outA            = inA;
2221   inA->factortype = MAT_FACTOR_LU;
2222 
2223   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2224 
2225   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2226   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2227   a->row = row;
2228   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2229   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2230   a->col = col;
2231 
2232   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2233   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2234   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2235   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2236 
2237   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2238   if (!a->solve_work) {
2239     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2240     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2241   }
2242   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2248 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2249 {
2250   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2251   PetscInt    i,nz,mbs;
2252 
2253   PetscFunctionBegin;
2254   nz  = baij->maxnz;
2255   mbs = baij->mbs;
2256   for (i=0; i<nz; i++) {
2257     baij->j[i] = indices[i];
2258   }
2259   baij->nz = nz;
2260   for (i=0; i<mbs; i++) {
2261     baij->ilen[i] = baij->imax[i];
2262   }
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2268 /*@
2269     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2270        in the matrix.
2271 
2272   Input Parameters:
2273 +  mat - the SeqBAIJ matrix
2274 -  indices - the column indices
2275 
2276   Level: advanced
2277 
2278   Notes:
2279     This can be called if you have precomputed the nonzero structure of the
2280   matrix and want to provide it to the matrix object to improve the performance
2281   of the MatSetValues() operation.
2282 
2283     You MUST have set the correct numbers of nonzeros per row in the call to
2284   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2285 
2286     MUST be called before any calls to MatSetValues();
2287 
2288 @*/
2289 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2290 {
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2295   PetscValidPointer(indices,2);
2296   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2302 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2303 {
2304   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2305   PetscErrorCode ierr;
2306   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2307   PetscReal      atmp;
2308   PetscScalar    *x,zero = 0.0;
2309   MatScalar      *aa;
2310   PetscInt       ncols,brow,krow,kcol;
2311 
2312   PetscFunctionBegin;
2313   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2314   bs  = A->rmap->bs;
2315   aa  = a->a;
2316   ai  = a->i;
2317   aj  = a->j;
2318   mbs = a->mbs;
2319 
2320   ierr = VecSet(v,zero);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2322   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2323   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2324   for (i=0; i<mbs; i++) {
2325     ncols = ai[1] - ai[0]; ai++;
2326     brow  = bs*i;
2327     for (j=0; j<ncols; j++) {
2328       for (kcol=0; kcol<bs; kcol++) {
2329         for (krow=0; krow<bs; krow++) {
2330           atmp = PetscAbsScalar(*aa);aa++;
2331           row  = brow + krow;   /* row index */
2332           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2333         }
2334       }
2335       aj++;
2336     }
2337   }
2338   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatCopy_SeqBAIJ"
2344 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2345 {
2346   PetscErrorCode ierr;
2347 
2348   PetscFunctionBegin;
2349   /* If the two matrices have the same copy implementation, use fast copy. */
2350   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2351     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2352     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2353     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2354 
2355     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2356     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2357     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2358   } else {
2359     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2360   }
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2366 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2367 {
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2377 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2378 {
2379   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2380 
2381   PetscFunctionBegin;
2382   *array = a->a;
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2388 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2389 {
2390   PetscFunctionBegin;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2396 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2397 {
2398   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2399   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2400   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   /* Set the number of nonzeros in the new matrix */
2405   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2411 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2412 {
2413   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2414   PetscErrorCode ierr;
2415   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2416   PetscBLASInt   one=1;
2417 
2418   PetscFunctionBegin;
2419   if (str == SAME_NONZERO_PATTERN) {
2420     PetscScalar  alpha = a;
2421     PetscBLASInt bnz;
2422     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2423     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2424     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2425   } else {
2426     Mat      B;
2427     PetscInt *nnz;
2428     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2429     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2430     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2431     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2432     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2433     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2434     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2435     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2436     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2437     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2438     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2439     ierr = PetscFree(nnz);CHKERRQ(ierr);
2440   }
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2446 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2447 {
2448   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2449   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2450   MatScalar   *aa = a->a;
2451 
2452   PetscFunctionBegin;
2453   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2459 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2460 {
2461   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2462   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2463   MatScalar   *aa = a->a;
2464 
2465   PetscFunctionBegin;
2466   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2472 /*
2473     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2474 */
2475 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2476 {
2477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2478   PetscErrorCode ierr;
2479   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2480   PetscInt       nz = a->i[m],row,*jj,mr,col;
2481 
2482   PetscFunctionBegin;
2483   *nn = n;
2484   if (!ia) PetscFunctionReturn(0);
2485   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2486   else {
2487     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2488     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2489     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2490     jj   = a->j;
2491     for (i=0; i<nz; i++) {
2492       collengths[jj[i]]++;
2493     }
2494     cia[0] = oshift;
2495     for (i=0; i<n; i++) {
2496       cia[i+1] = cia[i] + collengths[i];
2497     }
2498     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2499     jj   = a->j;
2500     for (row=0; row<m; row++) {
2501       mr = a->i[row+1] - a->i[row];
2502       for (i=0; i<mr; i++) {
2503         col = *jj++;
2504 
2505         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2506       }
2507     }
2508     ierr = PetscFree(collengths);CHKERRQ(ierr);
2509     *ia  = cia; *ja = cja;
2510   }
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2516 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2517 {
2518   PetscErrorCode ierr;
2519 
2520   PetscFunctionBegin;
2521   if (!ia) PetscFunctionReturn(0);
2522   ierr = PetscFree(*ia);CHKERRQ(ierr);
2523   ierr = PetscFree(*ja);CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 /*
2528  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2529  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2530  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2531  */
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2535 {
2536   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2537   PetscErrorCode ierr;
2538   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2539   PetscInt       nz = a->i[m],row,*jj,mr,col;
2540   PetscInt       *cspidx;
2541 
2542   PetscFunctionBegin;
2543   *nn = n;
2544   if (!ia) PetscFunctionReturn(0);
2545 
2546   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2547   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2548   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2549   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2550   jj   = a->j;
2551   for (i=0; i<nz; i++) {
2552     collengths[jj[i]]++;
2553   }
2554   cia[0] = oshift;
2555   for (i=0; i<n; i++) {
2556     cia[i+1] = cia[i] + collengths[i];
2557   }
2558   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2559   jj   = a->j;
2560   for (row=0; row<m; row++) {
2561     mr = a->i[row+1] - a->i[row];
2562     for (i=0; i<mr; i++) {
2563       col = *jj++;
2564       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2565       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2566     }
2567   }
2568   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2569   *ia    = cia; *ja = cja;
2570   *spidx = cspidx;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2577 {
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2582   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 /* -------------------------------------------------------------------*/
2587 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2588                                        MatGetRow_SeqBAIJ,
2589                                        MatRestoreRow_SeqBAIJ,
2590                                        MatMult_SeqBAIJ_N,
2591                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2592                                        MatMultTranspose_SeqBAIJ,
2593                                        MatMultTransposeAdd_SeqBAIJ,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                /* 10*/ 0,
2598                                        MatLUFactor_SeqBAIJ,
2599                                        0,
2600                                        0,
2601                                        MatTranspose_SeqBAIJ,
2602                                /* 15*/ MatGetInfo_SeqBAIJ,
2603                                        MatEqual_SeqBAIJ,
2604                                        MatGetDiagonal_SeqBAIJ,
2605                                        MatDiagonalScale_SeqBAIJ,
2606                                        MatNorm_SeqBAIJ,
2607                                /* 20*/ 0,
2608                                        MatAssemblyEnd_SeqBAIJ,
2609                                        MatSetOption_SeqBAIJ,
2610                                        MatZeroEntries_SeqBAIJ,
2611                                /* 24*/ MatZeroRows_SeqBAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                /* 29*/ MatSetUp_SeqBAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                /* 34*/ MatDuplicate_SeqBAIJ,
2622                                        0,
2623                                        0,
2624                                        MatILUFactor_SeqBAIJ,
2625                                        0,
2626                                /* 39*/ MatAXPY_SeqBAIJ,
2627                                        MatGetSubMatrices_SeqBAIJ,
2628                                        MatIncreaseOverlap_SeqBAIJ,
2629                                        MatGetValues_SeqBAIJ,
2630                                        MatCopy_SeqBAIJ,
2631                                /* 44*/ 0,
2632                                        MatScale_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        MatZeroRowsColumns_SeqBAIJ,
2636                                /* 49*/ 0,
2637                                        MatGetRowIJ_SeqBAIJ,
2638                                        MatRestoreRowIJ_SeqBAIJ,
2639                                        MatGetColumnIJ_SeqBAIJ,
2640                                        MatRestoreColumnIJ_SeqBAIJ,
2641                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        MatSetValuesBlocked_SeqBAIJ,
2646                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2647                                        MatDestroy_SeqBAIJ,
2648                                        MatView_SeqBAIJ,
2649                                        0,
2650                                        0,
2651                                /* 64*/ 0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2657                                        0,
2658                                        MatConvert_Basic,
2659                                        0,
2660                                        0,
2661                                /* 74*/ 0,
2662                                        MatFDColoringApply_BAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 79*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        MatLoad_SeqBAIJ,
2671                                /* 84*/ 0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 89*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                /* 94*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                /* 99*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /*104*/ 0,
2692                                        MatRealPart_SeqBAIJ,
2693                                        MatImaginaryPart_SeqBAIJ,
2694                                        0,
2695                                        0,
2696                                /*109*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        MatMissingDiagonal_SeqBAIJ,
2701                                /*114*/ 0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                /*119*/ 0,
2707                                        0,
2708                                        MatMultHermitianTranspose_SeqBAIJ,
2709                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2710                                        0,
2711                                /*124*/ 0,
2712                                        0,
2713                                        MatInvertBlockDiagonal_SeqBAIJ,
2714                                        0,
2715                                        0,
2716                                /*129*/ 0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                /*134*/ 0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                /*139*/ 0,
2727                                        0,
2728                                        0,
2729                                        MatFDColoringSetUp_SeqXAIJ
2730 };
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2734 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2735 {
2736   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2737   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2742 
2743   /* allocate space for values if not already there */
2744   if (!aij->saved_values) {
2745     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2746     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2747   }
2748 
2749   /* copy values over */
2750   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2756 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2757 {
2758   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2759   PetscErrorCode ierr;
2760   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2761 
2762   PetscFunctionBegin;
2763   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2764   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2765 
2766   /* copy values over */
2767   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2772 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2776 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2777 {
2778   Mat_SeqBAIJ    *b;
2779   PetscErrorCode ierr;
2780   PetscInt       i,mbs,nbs,bs2;
2781   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2782 
2783   PetscFunctionBegin;
2784   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2785   if (nz == MAT_SKIP_ALLOCATION) {
2786     skipallocation = PETSC_TRUE;
2787     nz             = 0;
2788   }
2789 
2790   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2791   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2792   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2793   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2794 
2795   B->preallocated = PETSC_TRUE;
2796 
2797   mbs = B->rmap->n/bs;
2798   nbs = B->cmap->n/bs;
2799   bs2 = bs*bs;
2800 
2801   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2802 
2803   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2804   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2805   if (nnz) {
2806     for (i=0; i<mbs; i++) {
2807       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2808       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2809     }
2810   }
2811 
2812   b    = (Mat_SeqBAIJ*)B->data;
2813   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2814   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2815   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2816 
2817   if (!flg) {
2818     switch (bs) {
2819     case 1:
2820       B->ops->mult    = MatMult_SeqBAIJ_1;
2821       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2822       break;
2823     case 2:
2824       B->ops->mult    = MatMult_SeqBAIJ_2;
2825       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2826       break;
2827     case 3:
2828       B->ops->mult    = MatMult_SeqBAIJ_3;
2829       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2830       break;
2831     case 4:
2832       B->ops->mult    = MatMult_SeqBAIJ_4;
2833       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2834       break;
2835     case 5:
2836       B->ops->mult    = MatMult_SeqBAIJ_5;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2838       break;
2839     case 6:
2840       B->ops->mult    = MatMult_SeqBAIJ_6;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2842       break;
2843     case 7:
2844       B->ops->mult    = MatMult_SeqBAIJ_7;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2846       break;
2847     case 15:
2848       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2850       break;
2851     default:
2852       B->ops->mult    = MatMult_SeqBAIJ_N;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2854       break;
2855     }
2856   }
2857   B->ops->sor = MatSOR_SeqBAIJ;
2858   b->mbs = mbs;
2859   b->nbs = nbs;
2860   if (!skipallocation) {
2861     if (!b->imax) {
2862       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2863       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2864 
2865       b->free_imax_ilen = PETSC_TRUE;
2866     }
2867     /* b->ilen will count nonzeros in each block row so far. */
2868     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2869     if (!nnz) {
2870       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2871       else if (nz < 0) nz = 1;
2872       for (i=0; i<mbs; i++) b->imax[i] = nz;
2873       nz = nz*mbs;
2874     } else {
2875       nz = 0;
2876       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2877     }
2878 
2879     /* allocate the matrix space */
2880     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2881     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2882     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2883     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2884     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2885 
2886     b->singlemalloc = PETSC_TRUE;
2887     b->i[0]         = 0;
2888     for (i=1; i<mbs+1; i++) {
2889       b->i[i] = b->i[i-1] + b->imax[i-1];
2890     }
2891     b->free_a  = PETSC_TRUE;
2892     b->free_ij = PETSC_TRUE;
2893 #if defined(PETSC_THREADCOMM_ACTIVE)
2894     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2895 #endif
2896   } else {
2897     b->free_a  = PETSC_FALSE;
2898     b->free_ij = PETSC_FALSE;
2899   }
2900 
2901   b->bs2              = bs2;
2902   b->mbs              = mbs;
2903   b->nz               = 0;
2904   b->maxnz            = nz;
2905   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2906   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2907   PetscFunctionReturn(0);
2908 }
2909 
2910 #undef __FUNCT__
2911 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2912 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2913 {
2914   PetscInt       i,m,nz,nz_max=0,*nnz;
2915   PetscScalar    *values=0;
2916   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2921   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2922   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2923   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2924   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2925   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2926   m    = B->rmap->n/bs;
2927 
2928   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2929   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2930   for (i=0; i<m; i++) {
2931     nz = ii[i+1]- ii[i];
2932     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2933     nz_max = PetscMax(nz_max, nz);
2934     nnz[i] = nz;
2935   }
2936   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2937   ierr = PetscFree(nnz);CHKERRQ(ierr);
2938 
2939   values = (PetscScalar*)V;
2940   if (!values) {
2941     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2942   }
2943   for (i=0; i<m; i++) {
2944     PetscInt          ncols  = ii[i+1] - ii[i];
2945     const PetscInt    *icols = jj + ii[i];
2946     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2947     if (!roworiented) {
2948       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2949     } else {
2950       PetscInt j;
2951       for (j=0; j<ncols; j++) {
2952         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2953         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2954       }
2955     }
2956   }
2957   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2965 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2966 #if defined(PETSC_HAVE_MUMPS)
2967 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2968 #endif
2969 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2970 
2971 /*MC
2972    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2973    block sparse compressed row format.
2974 
2975    Options Database Keys:
2976 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2977 
2978   Level: beginner
2979 
2980 .seealso: MatCreateSeqBAIJ()
2981 M*/
2982 
2983 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatCreate_SeqBAIJ"
2987 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2988 {
2989   PetscErrorCode ierr;
2990   PetscMPIInt    size;
2991   Mat_SeqBAIJ    *b;
2992 
2993   PetscFunctionBegin;
2994   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2995   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2996 
2997   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2998   B->data = (void*)b;
2999   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3000 
3001   b->row          = 0;
3002   b->col          = 0;
3003   b->icol         = 0;
3004   b->reallocs     = 0;
3005   b->saved_values = 0;
3006 
3007   b->roworiented        = PETSC_TRUE;
3008   b->nonew              = 0;
3009   b->diag               = 0;
3010   b->solve_work         = 0;
3011   b->mult_work          = 0;
3012   B->spptr              = 0;
3013   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3014   b->keepnonzeropattern = PETSC_FALSE;
3015 
3016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3019 #if defined(PETSC_HAVE_MUMPS)
3020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3021 #endif
3022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3032   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3038 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3039 {
3040   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3041   PetscErrorCode ierr;
3042   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3043 
3044   PetscFunctionBegin;
3045   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3046 
3047   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3048     c->imax           = a->imax;
3049     c->ilen           = a->ilen;
3050     c->free_imax_ilen = PETSC_FALSE;
3051   } else {
3052     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3053     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3054     for (i=0; i<mbs; i++) {
3055       c->imax[i] = a->imax[i];
3056       c->ilen[i] = a->ilen[i];
3057     }
3058     c->free_imax_ilen = PETSC_TRUE;
3059   }
3060 
3061   /* allocate the matrix space */
3062   if (mallocmatspace) {
3063     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3064       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3065       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3066 
3067       c->i            = a->i;
3068       c->j            = a->j;
3069       c->singlemalloc = PETSC_FALSE;
3070       c->free_a       = PETSC_TRUE;
3071       c->free_ij      = PETSC_FALSE;
3072       c->parent       = A;
3073       C->preallocated = PETSC_TRUE;
3074       C->assembled    = PETSC_TRUE;
3075 
3076       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3077       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3078       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3079     } else {
3080       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3081       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3082 
3083       c->singlemalloc = PETSC_TRUE;
3084       c->free_a       = PETSC_TRUE;
3085       c->free_ij      = PETSC_TRUE;
3086 
3087       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3088       if (mbs > 0) {
3089         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3090         if (cpvalues == MAT_COPY_VALUES) {
3091           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3092         } else {
3093           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3094         }
3095       }
3096       C->preallocated = PETSC_TRUE;
3097       C->assembled    = PETSC_TRUE;
3098     }
3099   }
3100 
3101   c->roworiented = a->roworiented;
3102   c->nonew       = a->nonew;
3103 
3104   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3105   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3106 
3107   c->bs2         = a->bs2;
3108   c->mbs         = a->mbs;
3109   c->nbs         = a->nbs;
3110 
3111   if (a->diag) {
3112     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3113       c->diag      = a->diag;
3114       c->free_diag = PETSC_FALSE;
3115     } else {
3116       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3117       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3118       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3119       c->free_diag = PETSC_TRUE;
3120     }
3121   } else c->diag = 0;
3122 
3123   c->nz         = a->nz;
3124   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3125   c->solve_work = 0;
3126   c->mult_work  = 0;
3127 
3128   c->compressedrow.use   = a->compressedrow.use;
3129   c->compressedrow.nrows = a->compressedrow.nrows;
3130   if (a->compressedrow.use) {
3131     i    = a->compressedrow.nrows;
3132     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3133     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3134     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3135     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3136   } else {
3137     c->compressedrow.use    = PETSC_FALSE;
3138     c->compressedrow.i      = NULL;
3139     c->compressedrow.rindex = NULL;
3140   }
3141   C->nonzerostate = A->nonzerostate;
3142 
3143   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3144   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3150 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3151 {
3152   PetscErrorCode ierr;
3153 
3154   PetscFunctionBegin;
3155   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3156   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3157   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3158   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatLoad_SeqBAIJ"
3164 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3165 {
3166   Mat_SeqBAIJ    *a;
3167   PetscErrorCode ierr;
3168   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3169   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3170   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3171   PetscInt       *masked,nmask,tmp,bs2,ishift;
3172   PetscMPIInt    size;
3173   int            fd;
3174   PetscScalar    *aa;
3175   MPI_Comm       comm;
3176 
3177   PetscFunctionBegin;
3178   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3179   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3180   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3181   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3182   bs2  = bs*bs;
3183 
3184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3185   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3186   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3187   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3188   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3189   M = header[1]; N = header[2]; nz = header[3];
3190 
3191   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3192   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3193 
3194   /*
3195      This code adds extra rows to make sure the number of rows is
3196     divisible by the blocksize
3197   */
3198   mbs        = M/bs;
3199   extra_rows = bs - M + bs*(mbs);
3200   if (extra_rows == bs) extra_rows = 0;
3201   else mbs++;
3202   if (extra_rows) {
3203     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3204   }
3205 
3206   /* Set global sizes if not already set */
3207   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3208     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3209   } else { /* Check if the matrix global sizes are correct */
3210     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3211     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3212       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3213     }
3214     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3215   }
3216 
3217   /* read in row lengths */
3218   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3219   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3220   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3221 
3222   /* read in column indices */
3223   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3224   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3225   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3226 
3227   /* loop over row lengths determining block row lengths */
3228   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3229   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3230   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3231   rowcount = 0;
3232   nzcount  = 0;
3233   for (i=0; i<mbs; i++) {
3234     nmask = 0;
3235     for (j=0; j<bs; j++) {
3236       kmax = rowlengths[rowcount];
3237       for (k=0; k<kmax; k++) {
3238         tmp = jj[nzcount++]/bs;
3239         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3240       }
3241       rowcount++;
3242     }
3243     browlengths[i] += nmask;
3244     /* zero out the mask elements we set */
3245     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3246   }
3247 
3248   /* Do preallocation  */
3249   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3250   a    = (Mat_SeqBAIJ*)newmat->data;
3251 
3252   /* set matrix "i" values */
3253   a->i[0] = 0;
3254   for (i=1; i<= mbs; i++) {
3255     a->i[i]      = a->i[i-1] + browlengths[i-1];
3256     a->ilen[i-1] = browlengths[i-1];
3257   }
3258   a->nz = 0;
3259   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3260 
3261   /* read in nonzero values */
3262   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3263   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3264   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3265 
3266   /* set "a" and "j" values into matrix */
3267   nzcount = 0; jcount = 0;
3268   for (i=0; i<mbs; i++) {
3269     nzcountb = nzcount;
3270     nmask    = 0;
3271     for (j=0; j<bs; j++) {
3272       kmax = rowlengths[i*bs+j];
3273       for (k=0; k<kmax; k++) {
3274         tmp = jj[nzcount++]/bs;
3275         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3276       }
3277     }
3278     /* sort the masked values */
3279     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3280 
3281     /* set "j" values into matrix */
3282     maskcount = 1;
3283     for (j=0; j<nmask; j++) {
3284       a->j[jcount++]  = masked[j];
3285       mask[masked[j]] = maskcount++;
3286     }
3287     /* set "a" values into matrix */
3288     ishift = bs2*a->i[i];
3289     for (j=0; j<bs; j++) {
3290       kmax = rowlengths[i*bs+j];
3291       for (k=0; k<kmax; k++) {
3292         tmp       = jj[nzcountb]/bs;
3293         block     = mask[tmp] - 1;
3294         point     = jj[nzcountb] - bs*tmp;
3295         idx       = ishift + bs2*block + j + bs*point;
3296         a->a[idx] = (MatScalar)aa[nzcountb++];
3297       }
3298     }
3299     /* zero out the mask elements we set */
3300     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3301   }
3302   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3303 
3304   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3305   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3306   ierr = PetscFree(aa);CHKERRQ(ierr);
3307   ierr = PetscFree(jj);CHKERRQ(ierr);
3308   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3309 
3310   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatCreateSeqBAIJ"
3317 /*@C
3318    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3319    compressed row) format.  For good matrix assembly performance the
3320    user should preallocate the matrix storage by setting the parameter nz
3321    (or the array nnz).  By setting these parameters accurately, performance
3322    during matrix assembly can be increased by more than a factor of 50.
3323 
3324    Collective on MPI_Comm
3325 
3326    Input Parameters:
3327 +  comm - MPI communicator, set to PETSC_COMM_SELF
3328 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3329           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3330 .  m - number of rows
3331 .  n - number of columns
3332 .  nz - number of nonzero blocks  per block row (same for all rows)
3333 -  nnz - array containing the number of nonzero blocks in the various block rows
3334          (possibly different for each block row) or NULL
3335 
3336    Output Parameter:
3337 .  A - the matrix
3338 
3339    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3340    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3341    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3342 
3343    Options Database Keys:
3344 .   -mat_no_unroll - uses code that does not unroll the loops in the
3345                      block calculations (much slower)
3346 .    -mat_block_size - size of the blocks to use
3347 
3348    Level: intermediate
3349 
3350    Notes:
3351    The number of rows and columns must be divisible by blocksize.
3352 
3353    If the nnz parameter is given then the nz parameter is ignored
3354 
3355    A nonzero block is any block that as 1 or more nonzeros in it
3356 
3357    The block AIJ format is fully compatible with standard Fortran 77
3358    storage.  That is, the stored row and column indices can begin at
3359    either one (as in Fortran) or zero.  See the users' manual for details.
3360 
3361    Specify the preallocated storage with either nz or nnz (not both).
3362    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3363    allocation.  See Users-Manual: ch_mat for details.
3364    matrices.
3365 
3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3367 @*/
3368 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3369 {
3370   PetscErrorCode ierr;
3371 
3372   PetscFunctionBegin;
3373   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3374   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3375   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3376   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 #undef __FUNCT__
3381 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3382 /*@C
3383    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3384    per row in the matrix. For good matrix assembly performance the
3385    user should preallocate the matrix storage by setting the parameter nz
3386    (or the array nnz).  By setting these parameters accurately, performance
3387    during matrix assembly can be increased by more than a factor of 50.
3388 
3389    Collective on MPI_Comm
3390 
3391    Input Parameters:
3392 +  B - the matrix
3393 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3394           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3395 .  nz - number of block nonzeros per block row (same for all rows)
3396 -  nnz - array containing the number of block nonzeros in the various block rows
3397          (possibly different for each block row) or NULL
3398 
3399    Options Database Keys:
3400 .   -mat_no_unroll - uses code that does not unroll the loops in the
3401                      block calculations (much slower)
3402 .    -mat_block_size - size of the blocks to use
3403 
3404    Level: intermediate
3405 
3406    Notes:
3407    If the nnz parameter is given then the nz parameter is ignored
3408 
3409    You can call MatGetInfo() to get information on how effective the preallocation was;
3410    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3411    You can also run with the option -info and look for messages with the string
3412    malloc in them to see if additional memory allocation was needed.
3413 
3414    The block AIJ format is fully compatible with standard Fortran 77
3415    storage.  That is, the stored row and column indices can begin at
3416    either one (as in Fortran) or zero.  See the users' manual for details.
3417 
3418    Specify the preallocated storage with either nz or nnz (not both).
3419    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3420    allocation.  See Users-Manual: ch_mat for details.
3421 
3422 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3423 @*/
3424 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3425 {
3426   PetscErrorCode ierr;
3427 
3428   PetscFunctionBegin;
3429   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3430   PetscValidType(B,1);
3431   PetscValidLogicalCollectiveInt(B,bs,2);
3432   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 #undef __FUNCT__
3437 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3438 /*@C
3439    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3440    (the default sequential PETSc format).
3441 
3442    Collective on MPI_Comm
3443 
3444    Input Parameters:
3445 +  B - the matrix
3446 .  i - the indices into j for the start of each local row (starts with zero)
3447 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3448 -  v - optional values in the matrix
3449 
3450    Level: developer
3451 
3452    Notes:
3453    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3454    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3455    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3456    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3457    block column and the second index is over columns within a block.
3458 
3459 .keywords: matrix, aij, compressed row, sparse
3460 
3461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3462 @*/
3463 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3464 {
3465   PetscErrorCode ierr;
3466 
3467   PetscFunctionBegin;
3468   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3469   PetscValidType(B,1);
3470   PetscValidLogicalCollectiveInt(B,bs,2);
3471   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3472   PetscFunctionReturn(0);
3473 }
3474 
3475 
3476 #undef __FUNCT__
3477 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3478 /*@
3479      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3480 
3481      Collective on MPI_Comm
3482 
3483    Input Parameters:
3484 +  comm - must be an MPI communicator of size 1
3485 .  bs - size of block
3486 .  m - number of rows
3487 .  n - number of columns
3488 .  i - row indices
3489 .  j - column indices
3490 -  a - matrix values
3491 
3492    Output Parameter:
3493 .  mat - the matrix
3494 
3495    Level: advanced
3496 
3497    Notes:
3498        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3499     once the matrix is destroyed
3500 
3501        You cannot set new nonzero locations into this matrix, that will generate an error.
3502 
3503        The i and j indices are 0 based
3504 
3505        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3506 
3507       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3508       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3509       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3510       with column-major ordering within blocks.
3511 
3512 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3513 
3514 @*/
3515 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3516 {
3517   PetscErrorCode ierr;
3518   PetscInt       ii;
3519   Mat_SeqBAIJ    *baij;
3520 
3521   PetscFunctionBegin;
3522   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3523   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3524 
3525   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3526   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3527   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3528   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3529   baij = (Mat_SeqBAIJ*)(*mat)->data;
3530   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3531   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3532 
3533   baij->i = i;
3534   baij->j = j;
3535   baij->a = a;
3536 
3537   baij->singlemalloc = PETSC_FALSE;
3538   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3539   baij->free_a       = PETSC_FALSE;
3540   baij->free_ij      = PETSC_FALSE;
3541 
3542   for (ii=0; ii<m; ii++) {
3543     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3544 #if defined(PETSC_USE_DEBUG)
3545     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3546 #endif
3547   }
3548 #if defined(PETSC_USE_DEBUG)
3549   for (ii=0; ii<baij->i[m]; ii++) {
3550     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3551     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3552   }
3553 #endif
3554 
3555   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3556   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3557   PetscFunctionReturn(0);
3558 }
3559