xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7da1fb6e516b95fd231a3ef1ae9cb4a28f357e5e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*jj = a->j,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !jj) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (jj[diag[i]] != i) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1211   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1212 
1213   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1214   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1215   ierr = PetscFree(A->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260   case MAT_USE_HASH_TABLE:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1283   MatScalar      *aa,*aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   ai  = a->i;
1289   aj  = a->j;
1290   aa  = a->a;
1291   bs2 = a->bs2;
1292 
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1334   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1348   PetscInt       *rows,*cols,bs2=a->bs2;
1349   MatScalar      *array;
1350 
1351   PetscFunctionBegin;
1352   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1353   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1354     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1355 
1356     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1357     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1358     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1359     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1360     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1361     ierr = PetscFree(col);CHKERRQ(ierr);
1362   } else {
1363     C = *B;
1364   }
1365 
1366   array = a->a;
1367   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1368   for (i=0; i<mbs; i++) {
1369     cols[0] = i*bs;
1370     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1371     len = ai[i+1] - ai[i];
1372     for (j=0; j<len; j++) {
1373       rows[0] = (*aj++)*bs;
1374       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1375       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1376       array += bs2;
1377     }
1378   }
1379   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1380 
1381   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1382   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383 
1384   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1385     *B = C;
1386   } else {
1387     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1394 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1395 {
1396   PetscErrorCode ierr;
1397   Mat            Btrans;
1398 
1399   PetscFunctionBegin;
1400   *f   = PETSC_FALSE;
1401   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1402   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1403   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1409 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1410 {
1411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1414   int            fd;
1415   PetscScalar    *aa;
1416   FILE           *file;
1417 
1418   PetscFunctionBegin;
1419   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1420   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1421   col_lens[0] = MAT_FILE_CLASSID;
1422 
1423   col_lens[1] = A->rmap->N;
1424   col_lens[2] = A->cmap->n;
1425   col_lens[3] = a->nz*bs2;
1426 
1427   /* store lengths of each row and write (including header) to file */
1428   count = 0;
1429   for (i=0; i<a->mbs; i++) {
1430     for (j=0; j<bs; j++) {
1431       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1432     }
1433   }
1434   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1435   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1436 
1437   /* store column indices (zero start index) */
1438   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       for (k=a->i[i]; k<a->i[i+1]; k++) {
1443         for (l=0; l<bs; l++) {
1444           jj[count++] = bs*a->j[k] + l;
1445         }
1446       }
1447     }
1448   }
1449   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1450   ierr = PetscFree(jj);CHKERRQ(ierr);
1451 
1452   /* store nonzero values */
1453   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1454   count = 0;
1455   for (i=0; i<a->mbs; i++) {
1456     for (j=0; j<bs; j++) {
1457       for (k=a->i[i]; k<a->i[i+1]; k++) {
1458         for (l=0; l<bs; l++) {
1459           aa[count++] = a->a[bs2*k + l*bs + j];
1460         }
1461       }
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1465   ierr = PetscFree(aa);CHKERRQ(ierr);
1466 
1467   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1468   if (file) {
1469     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1476 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1477 {
1478   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1479   PetscErrorCode    ierr;
1480   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1481   PetscViewerFormat format;
1482 
1483   PetscFunctionBegin;
1484   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1485   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1486     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1487   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1488     Mat aij;
1489     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1490     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1491     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1492   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496     for (i=0; i<a->mbs; i++) {
1497       for (j=0; j<bs; j++) {
1498         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1499         for (k=a->i[i]; k<a->i[i+1]; k++) {
1500           for (l=0; l<bs; l++) {
1501 #if defined(PETSC_USE_COMPLEX)
1502             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1503               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1504                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1505             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1506               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1507                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1508             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1509               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1510             }
1511 #else
1512             if (a->a[bs2*k + l*bs + j] != 0.0) {
1513               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1514             }
1515 #endif
1516           }
1517         }
1518         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1519       }
1520     }
1521     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1522   } else {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1532                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1535                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1541 #endif
1542           }
1543         }
1544         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1545       }
1546     }
1547     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1548   }
1549   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #include <petscdraw.h>
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1556 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1557 {
1558   Mat               A = (Mat) Aa;
1559   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1560   PetscErrorCode    ierr;
1561   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1562   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1563   MatScalar         *aa;
1564   PetscViewer       viewer;
1565   PetscViewerFormat format;
1566 
1567   PetscFunctionBegin;
1568   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1569   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1570 
1571   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1572 
1573   /* loop over matrix elements drawing boxes */
1574 
1575   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1576     color = PETSC_DRAW_BLUE;
1577     for (i=0,row=0; i<mbs; i++,row+=bs) {
1578       for (j=a->i[i]; j<a->i[i+1]; j++) {
1579         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1580         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1581         aa  = a->a + j*bs2;
1582         for (k=0; k<bs; k++) {
1583           for (l=0; l<bs; l++) {
1584             if (PetscRealPart(*aa++) >=  0.) continue;
1585             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1586           }
1587         }
1588       }
1589     }
1590     color = PETSC_DRAW_CYAN;
1591     for (i=0,row=0; i<mbs; i++,row+=bs) {
1592       for (j=a->i[i]; j<a->i[i+1]; j++) {
1593         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1594         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1595         aa  = a->a + j*bs2;
1596         for (k=0; k<bs; k++) {
1597           for (l=0; l<bs; l++) {
1598             if (PetscRealPart(*aa++) != 0.) continue;
1599             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1600           }
1601         }
1602       }
1603     }
1604     color = PETSC_DRAW_RED;
1605     for (i=0,row=0; i<mbs; i++,row+=bs) {
1606       for (j=a->i[i]; j<a->i[i+1]; j++) {
1607         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609         aa  = a->a + j*bs2;
1610         for (k=0; k<bs; k++) {
1611           for (l=0; l<bs; l++) {
1612             if (PetscRealPart(*aa++) <= 0.) continue;
1613             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1614           }
1615         }
1616       }
1617     }
1618   } else {
1619     /* use contour shading to indicate magnitude of values */
1620     /* first determine max of all nonzero values */
1621     PetscDraw popup;
1622     PetscReal scale,maxv = 0.0;
1623 
1624     for (i=0; i<a->nz*a->bs2; i++) {
1625       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1626     }
1627     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1628     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1629     if (popup) {
1630       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1631     }
1632     for (i=0,row=0; i<mbs; i++,row+=bs) {
1633       for (j=a->i[i]; j<a->i[i+1]; j++) {
1634         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1635         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1636         aa  = a->a + j*bs2;
1637         for (k=0; k<bs; k++) {
1638           for (l=0; l<bs; l++) {
1639             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1640             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1641           }
1642         }
1643       }
1644     }
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1651 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1652 {
1653   PetscErrorCode ierr;
1654   PetscReal      xl,yl,xr,yr,w,h;
1655   PetscDraw      draw;
1656   PetscBool      isnull;
1657 
1658   PetscFunctionBegin;
1659   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1660   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1661 
1662   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1663   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1664   xr  += w;    yr += h;  xl = -w;     yl = -h;
1665   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1666   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1667   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatView_SeqBAIJ"
1673 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1674 {
1675   PetscErrorCode ierr;
1676   PetscBool      iascii,isbinary,isdraw;
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1680   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1682   if (iascii) {
1683     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1684   } else if (isbinary) {
1685     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1686   } else if (isdraw) {
1687     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1688   } else {
1689     Mat B;
1690     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1691     ierr = MatView(B,viewer);CHKERRQ(ierr);
1692     ierr = MatDestroy(&B);CHKERRQ(ierr);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1700 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1701 {
1702   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1703   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1704   PetscInt    *ai = a->i,*ailen = a->ilen;
1705   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1706   MatScalar   *ap,*aa = a->a;
1707 
1708   PetscFunctionBegin;
1709   for (k=0; k<m; k++) { /* loop over rows */
1710     row = im[k]; brow = row/bs;
1711     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1712     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1713     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1714     nrow = ailen[brow];
1715     for (l=0; l<n; l++) { /* loop over columns */
1716       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1717       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1718       col  = in[l];
1719       bcol = col/bs;
1720       cidx = col%bs;
1721       ridx = row%bs;
1722       high = nrow;
1723       low  = 0; /* assume unsorted */
1724       while (high-low > 5) {
1725         t = (low+high)/2;
1726         if (rp[t] > bcol) high = t;
1727         else             low  = t;
1728       }
1729       for (i=low; i<high; i++) {
1730         if (rp[i] > bcol) break;
1731         if (rp[i] == bcol) {
1732           *v++ = ap[bs2*i+bs*cidx+ridx];
1733           goto finished;
1734         }
1735       }
1736       *v++ = 0.0;
1737 finished:;
1738     }
1739   }
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1746 {
1747   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1748   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1749   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1750   PetscErrorCode    ierr;
1751   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1752   PetscBool         roworiented=a->roworiented;
1753   const PetscScalar *value     = v;
1754   MatScalar         *ap,*aa = a->a,*bap;
1755 
1756   PetscFunctionBegin;
1757   if (roworiented) {
1758     stepval = (n-1)*bs;
1759   } else {
1760     stepval = (m-1)*bs;
1761   }
1762   for (k=0; k<m; k++) { /* loop over added rows */
1763     row = im[k];
1764     if (row < 0) continue;
1765 #if defined(PETSC_USE_DEBUG)
1766     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1767 #endif
1768     rp   = aj + ai[row];
1769     ap   = aa + bs2*ai[row];
1770     rmax = imax[row];
1771     nrow = ailen[row];
1772     low  = 0;
1773     high = nrow;
1774     for (l=0; l<n; l++) { /* loop over added columns */
1775       if (in[l] < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1778 #endif
1779       col = in[l];
1780       if (roworiented) {
1781         value = v + (k*(stepval+bs) + l)*bs;
1782       } else {
1783         value = v + (l*(stepval+bs) + k)*bs;
1784       }
1785       if (col <= lastcol) low = 0;
1786       else high = nrow;
1787       lastcol = col;
1788       while (high-low > 7) {
1789         t = (low+high)/2;
1790         if (rp[t] > col) high = t;
1791         else             low  = t;
1792       }
1793       for (i=low; i<high; i++) {
1794         if (rp[i] > col) break;
1795         if (rp[i] == col) {
1796           bap = ap +  bs2*i;
1797           if (roworiented) {
1798             if (is == ADD_VALUES) {
1799               for (ii=0; ii<bs; ii++,value+=stepval) {
1800                 for (jj=ii; jj<bs2; jj+=bs) {
1801                   bap[jj] += *value++;
1802                 }
1803               }
1804             } else {
1805               for (ii=0; ii<bs; ii++,value+=stepval) {
1806                 for (jj=ii; jj<bs2; jj+=bs) {
1807                   bap[jj] = *value++;
1808                 }
1809               }
1810             }
1811           } else {
1812             if (is == ADD_VALUES) {
1813               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1814                 for (jj=0; jj<bs; jj++) {
1815                   bap[jj] += value[jj];
1816                 }
1817                 bap += bs;
1818               }
1819             } else {
1820               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1821                 for (jj=0; jj<bs; jj++) {
1822                   bap[jj]  = value[jj];
1823                 }
1824                 bap += bs;
1825               }
1826             }
1827           }
1828           goto noinsert2;
1829         }
1830       }
1831       if (nonew == 1) goto noinsert2;
1832       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1833       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1834       N = nrow++ - 1; high++;
1835       /* shift up all the later entries in this row */
1836       for (ii=N; ii>=i; ii--) {
1837         rp[ii+1] = rp[ii];
1838         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1839       }
1840       if (N >= i) {
1841         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1842       }
1843       rp[i] = col;
1844       bap   = ap +  bs2*i;
1845       if (roworiented) {
1846         for (ii=0; ii<bs; ii++,value+=stepval) {
1847           for (jj=ii; jj<bs2; jj+=bs) {
1848             bap[jj] = *value++;
1849           }
1850         }
1851       } else {
1852         for (ii=0; ii<bs; ii++,value+=stepval) {
1853           for (jj=0; jj<bs; jj++) {
1854             *bap++ = *value++;
1855           }
1856         }
1857       }
1858 noinsert2:;
1859       low = i;
1860     }
1861     ailen[row] = nrow;
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1868 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1869 {
1870   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1871   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1872   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1873   PetscErrorCode ierr;
1874   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1875   MatScalar      *aa  = a->a,*ap;
1876   PetscReal      ratio=0.6;
1877 
1878   PetscFunctionBegin;
1879   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1880 
1881   if (m) rmax = ailen[0];
1882   for (i=1; i<mbs; i++) {
1883     /* move each row back by the amount of empty slots (fshift) before it*/
1884     fshift += imax[i-1] - ailen[i-1];
1885     rmax    = PetscMax(rmax,ailen[i]);
1886     if (fshift) {
1887       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1888       N  = ailen[i];
1889       for (j=0; j<N; j++) {
1890         ip[j-fshift] = ip[j];
1891 
1892         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1893       }
1894     }
1895     ai[i] = ai[i-1] + ailen[i-1];
1896   }
1897   if (mbs) {
1898     fshift += imax[mbs-1] - ailen[mbs-1];
1899     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1900   }
1901 
1902   /* reset ilen and imax for each row */
1903   a->nonzerorowcnt = 0;
1904   for (i=0; i<mbs; i++) {
1905     ailen[i] = imax[i] = ai[i+1] - ai[i];
1906     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1907   }
1908   a->nz = ai[mbs];
1909 
1910   /* diagonals may have moved, so kill the diagonal pointers */
1911   a->idiagvalid = PETSC_FALSE;
1912   if (fshift && a->diag) {
1913     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1914     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1915     a->diag = 0;
1916   }
1917   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1918   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1919   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1920   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1921 
1922   A->info.mallocs    += a->reallocs;
1923   a->reallocs         = 0;
1924   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1925 
1926   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 /*
1931    This function returns an array of flags which indicate the locations of contiguous
1932    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1933    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1934    Assume: sizes should be long enough to hold all the values.
1935 */
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1938 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1939 {
1940   PetscInt  i,j,k,row;
1941   PetscBool flg;
1942 
1943   PetscFunctionBegin;
1944   for (i=0,j=0; i<n; j++) {
1945     row = idx[i];
1946     if (row%bs!=0) { /* Not the begining of a block */
1947       sizes[j] = 1;
1948       i++;
1949     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1950       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1951       i++;
1952     } else { /* Begining of the block, so check if the complete block exists */
1953       flg = PETSC_TRUE;
1954       for (k=1; k<bs; k++) {
1955         if (row+k != idx[i+k]) { /* break in the block */
1956           flg = PETSC_FALSE;
1957           break;
1958         }
1959       }
1960       if (flg) { /* No break in the bs */
1961         sizes[j] = bs;
1962         i       += bs;
1963       } else {
1964         sizes[j] = 1;
1965         i++;
1966       }
1967     }
1968   }
1969   *bs_max = j;
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1975 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1976 {
1977   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1978   PetscErrorCode    ierr;
1979   PetscInt          i,j,k,count,*rows;
1980   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1981   PetscScalar       zero = 0.0;
1982   MatScalar         *aa;
1983   const PetscScalar *xx;
1984   PetscScalar       *bb;
1985 
1986   PetscFunctionBegin;
1987   /* fix right hand side if needed */
1988   if (x && b) {
1989     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1990     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1991     for (i=0; i<is_n; i++) {
1992       bb[is_idx[i]] = diag*xx[is_idx[i]];
1993     }
1994     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1995     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1996   }
1997 
1998   /* Make a copy of the IS and  sort it */
1999   /* allocate memory for rows,sizes */
2000   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2001 
2002   /* copy IS values to rows, and sort them */
2003   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2004   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2005 
2006   if (baij->keepnonzeropattern) {
2007     for (i=0; i<is_n; i++) sizes[i] = 1;
2008     bs_max          = is_n;
2009   } else {
2010     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2011     A->nonzerostate++;
2012   }
2013 
2014   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2015     row = rows[j];
2016     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2017     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2018     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2019     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2020       if (diag != (PetscScalar)0.0) {
2021         if (baij->ilen[row/bs] > 0) {
2022           baij->ilen[row/bs]       = 1;
2023           baij->j[baij->i[row/bs]] = row/bs;
2024 
2025           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2026         }
2027         /* Now insert all the diagonal values for this bs */
2028         for (k=0; k<bs; k++) {
2029           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2030         }
2031       } else { /* (diag == 0.0) */
2032         baij->ilen[row/bs] = 0;
2033       } /* end (diag == 0.0) */
2034     } else { /* (sizes[i] != bs) */
2035 #if defined(PETSC_USE_DEBUG)
2036       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2037 #endif
2038       for (k=0; k<count; k++) {
2039         aa[0] =  zero;
2040         aa   += bs;
2041       }
2042       if (diag != (PetscScalar)0.0) {
2043         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2044       }
2045     }
2046   }
2047 
2048   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2049   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2055 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2056 {
2057   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2058   PetscErrorCode    ierr;
2059   PetscInt          i,j,k,count;
2060   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2061   PetscScalar       zero = 0.0;
2062   MatScalar         *aa;
2063   const PetscScalar *xx;
2064   PetscScalar       *bb;
2065   PetscBool         *zeroed,vecs = PETSC_FALSE;
2066 
2067   PetscFunctionBegin;
2068   /* fix right hand side if needed */
2069   if (x && b) {
2070     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2071     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2072     vecs = PETSC_TRUE;
2073   }
2074 
2075   /* zero the columns */
2076   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2077   for (i=0; i<is_n; i++) {
2078     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2079     zeroed[is_idx[i]] = PETSC_TRUE;
2080   }
2081   for (i=0; i<A->rmap->N; i++) {
2082     if (!zeroed[i]) {
2083       row = i/bs;
2084       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2085         for (k=0; k<bs; k++) {
2086           col = bs*baij->j[j] + k;
2087           if (zeroed[col]) {
2088             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2089             if (vecs) bb[i] -= aa[0]*xx[col];
2090             aa[0] = 0.0;
2091           }
2092         }
2093       }
2094     } else if (vecs) bb[i] = diag*xx[i];
2095   }
2096   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2097   if (vecs) {
2098     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2099     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2100   }
2101 
2102   /* zero the rows */
2103   for (i=0; i<is_n; i++) {
2104     row   = is_idx[i];
2105     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2106     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2107     for (k=0; k<count; k++) {
2108       aa[0] =  zero;
2109       aa   += bs;
2110     }
2111     if (diag != (PetscScalar)0.0) {
2112       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2113     }
2114   }
2115   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2121 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2122 {
2123   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2124   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2125   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2126   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2127   PetscErrorCode ierr;
2128   PetscInt       ridx,cidx,bs2=a->bs2;
2129   PetscBool      roworiented=a->roworiented;
2130   MatScalar      *ap,value,*aa=a->a,*bap;
2131 
2132   PetscFunctionBegin;
2133   if (v) PetscValidScalarPointer(v,6);
2134   for (k=0; k<m; k++) { /* loop over added rows */
2135     row  = im[k];
2136     brow = row/bs;
2137     if (row < 0) continue;
2138 #if defined(PETSC_USE_DEBUG)
2139     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2140 #endif
2141     rp   = aj + ai[brow];
2142     ap   = aa + bs2*ai[brow];
2143     rmax = imax[brow];
2144     nrow = ailen[brow];
2145     low  = 0;
2146     high = nrow;
2147     for (l=0; l<n; l++) { /* loop over added columns */
2148       if (in[l] < 0) continue;
2149 #if defined(PETSC_USE_DEBUG)
2150       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2151 #endif
2152       col  = in[l]; bcol = col/bs;
2153       ridx = row % bs; cidx = col % bs;
2154       if (roworiented) {
2155         value = v[l + k*n];
2156       } else {
2157         value = v[k + l*m];
2158       }
2159       if (col <= lastcol) low = 0; else high = nrow;
2160       lastcol = col;
2161       while (high-low > 7) {
2162         t = (low+high)/2;
2163         if (rp[t] > bcol) high = t;
2164         else              low  = t;
2165       }
2166       for (i=low; i<high; i++) {
2167         if (rp[i] > bcol) break;
2168         if (rp[i] == bcol) {
2169           bap = ap +  bs2*i + bs*cidx + ridx;
2170           if (is == ADD_VALUES) *bap += value;
2171           else                  *bap  = value;
2172           goto noinsert1;
2173         }
2174       }
2175       if (nonew == 1) goto noinsert1;
2176       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2177       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2178       N = nrow++ - 1; high++;
2179       /* shift up all the later entries in this row */
2180       for (ii=N; ii>=i; ii--) {
2181         rp[ii+1] = rp[ii];
2182         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2183       }
2184       if (N>=i) {
2185         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2186       }
2187       rp[i]                      = bcol;
2188       ap[bs2*i + bs*cidx + ridx] = value;
2189       a->nz++;
2190       A->nonzerostate++;
2191 noinsert1:;
2192       low = i;
2193     }
2194     ailen[brow] = nrow;
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2201 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2202 {
2203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2204   Mat            outA;
2205   PetscErrorCode ierr;
2206   PetscBool      row_identity,col_identity;
2207 
2208   PetscFunctionBegin;
2209   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2210   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2211   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2212   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2213 
2214   outA            = inA;
2215   inA->factortype = MAT_FACTOR_LU;
2216 
2217   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2218 
2219   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2220   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2221   a->row = row;
2222   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2223   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2224   a->col = col;
2225 
2226   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2227   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2228   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2229   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2230 
2231   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2232   if (!a->solve_work) {
2233     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2234     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2235   }
2236   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2242 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2243 {
2244   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2245   PetscInt    i,nz,mbs;
2246 
2247   PetscFunctionBegin;
2248   nz  = baij->maxnz;
2249   mbs = baij->mbs;
2250   for (i=0; i<nz; i++) {
2251     baij->j[i] = indices[i];
2252   }
2253   baij->nz = nz;
2254   for (i=0; i<mbs; i++) {
2255     baij->ilen[i] = baij->imax[i];
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2262 /*@
2263     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2264        in the matrix.
2265 
2266   Input Parameters:
2267 +  mat - the SeqBAIJ matrix
2268 -  indices - the column indices
2269 
2270   Level: advanced
2271 
2272   Notes:
2273     This can be called if you have precomputed the nonzero structure of the
2274   matrix and want to provide it to the matrix object to improve the performance
2275   of the MatSetValues() operation.
2276 
2277     You MUST have set the correct numbers of nonzeros per row in the call to
2278   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2279 
2280     MUST be called before any calls to MatSetValues();
2281 
2282 @*/
2283 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2284 {
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2289   PetscValidPointer(indices,2);
2290   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2296 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2297 {
2298   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2299   PetscErrorCode ierr;
2300   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2301   PetscReal      atmp;
2302   PetscScalar    *x,zero = 0.0;
2303   MatScalar      *aa;
2304   PetscInt       ncols,brow,krow,kcol;
2305 
2306   PetscFunctionBegin;
2307   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2308   bs  = A->rmap->bs;
2309   aa  = a->a;
2310   ai  = a->i;
2311   aj  = a->j;
2312   mbs = a->mbs;
2313 
2314   ierr = VecSet(v,zero);CHKERRQ(ierr);
2315   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2316   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2317   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2318   for (i=0; i<mbs; i++) {
2319     ncols = ai[1] - ai[0]; ai++;
2320     brow  = bs*i;
2321     for (j=0; j<ncols; j++) {
2322       for (kcol=0; kcol<bs; kcol++) {
2323         for (krow=0; krow<bs; krow++) {
2324           atmp = PetscAbsScalar(*aa);aa++;
2325           row  = brow + krow;   /* row index */
2326           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2327         }
2328       }
2329       aj++;
2330     }
2331   }
2332   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatCopy_SeqBAIJ"
2338 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2339 {
2340   PetscErrorCode ierr;
2341 
2342   PetscFunctionBegin;
2343   /* If the two matrices have the same copy implementation, use fast copy. */
2344   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2345     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2346     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2347     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2348 
2349     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2350     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2351     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2352   } else {
2353     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2354   }
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2360 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2371 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2372 {
2373   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2374 
2375   PetscFunctionBegin;
2376   *array = a->a;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2382 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2383 {
2384   PetscFunctionBegin;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2390 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2391 {
2392   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2395   PetscBLASInt   one=1;
2396 
2397   PetscFunctionBegin;
2398   if (str == SAME_NONZERO_PATTERN) {
2399     PetscScalar  alpha = a;
2400     PetscBLASInt bnz;
2401     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2402     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2403   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2404     if (y->xtoy && y->XtoY != X) {
2405       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2406       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2407     }
2408     if (!y->xtoy) { /* get xtoy */
2409       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2410       y->XtoY = X;
2411       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2412     }
2413     for (i=0; i<x->nz; i++) {
2414       j = 0;
2415       while (j < bs2) {
2416         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2417         j++;
2418       }
2419     }
2420     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2421   } else {
2422     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2423   }
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 #undef __FUNCT__
2428 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2429 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2430 {
2431   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2432   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2433   MatScalar   *aa = a->a;
2434 
2435   PetscFunctionBegin;
2436   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2442 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2443 {
2444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2446   MatScalar   *aa = a->a;
2447 
2448   PetscFunctionBegin;
2449   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2455 /*
2456     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2457 */
2458 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2459 {
2460   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2461   PetscErrorCode ierr;
2462   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2463   PetscInt       nz = a->i[m],row,*jj,mr,col;
2464 
2465   PetscFunctionBegin;
2466   *nn = n;
2467   if (!ia) PetscFunctionReturn(0);
2468   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2469   else {
2470     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2471     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2472     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2473     jj   = a->j;
2474     for (i=0; i<nz; i++) {
2475       collengths[jj[i]]++;
2476     }
2477     cia[0] = oshift;
2478     for (i=0; i<n; i++) {
2479       cia[i+1] = cia[i] + collengths[i];
2480     }
2481     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2482     jj   = a->j;
2483     for (row=0; row<m; row++) {
2484       mr = a->i[row+1] - a->i[row];
2485       for (i=0; i<mr; i++) {
2486         col = *jj++;
2487 
2488         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2489       }
2490     }
2491     ierr = PetscFree(collengths);CHKERRQ(ierr);
2492     *ia  = cia; *ja = cja;
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2499 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2500 {
2501   PetscErrorCode ierr;
2502 
2503   PetscFunctionBegin;
2504   if (!ia) PetscFunctionReturn(0);
2505   ierr = PetscFree(*ia);CHKERRQ(ierr);
2506   ierr = PetscFree(*ja);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 /*
2511  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2512  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2513  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2514  */
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2517 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2518 {
2519   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2520   PetscErrorCode ierr;
2521   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2522   PetscInt       nz = a->i[m],row,*jj,mr,col;
2523   PetscInt       *cspidx;
2524 
2525   PetscFunctionBegin;
2526   *nn = n;
2527   if (!ia) PetscFunctionReturn(0);
2528 
2529   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2530   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2531   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2532   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2533   jj   = a->j;
2534   for (i=0; i<nz; i++) {
2535     collengths[jj[i]]++;
2536   }
2537   cia[0] = oshift;
2538   for (i=0; i<n; i++) {
2539     cia[i+1] = cia[i] + collengths[i];
2540   }
2541   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2542   jj   = a->j;
2543   for (row=0; row<m; row++) {
2544     mr = a->i[row+1] - a->i[row];
2545     for (i=0; i<mr; i++) {
2546       col = *jj++;
2547       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2548       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2549     }
2550   }
2551   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2552   *ia    = cia; *ja = cja;
2553   *spidx = cspidx;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2559 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2560 {
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2565   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* -------------------------------------------------------------------*/
2570 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2571                                        MatGetRow_SeqBAIJ,
2572                                        MatRestoreRow_SeqBAIJ,
2573                                        MatMult_SeqBAIJ_N,
2574                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2575                                        MatMultTranspose_SeqBAIJ,
2576                                        MatMultTransposeAdd_SeqBAIJ,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                /* 10*/ 0,
2581                                        MatLUFactor_SeqBAIJ,
2582                                        0,
2583                                        0,
2584                                        MatTranspose_SeqBAIJ,
2585                                /* 15*/ MatGetInfo_SeqBAIJ,
2586                                        MatEqual_SeqBAIJ,
2587                                        MatGetDiagonal_SeqBAIJ,
2588                                        MatDiagonalScale_SeqBAIJ,
2589                                        MatNorm_SeqBAIJ,
2590                                /* 20*/ 0,
2591                                        MatAssemblyEnd_SeqBAIJ,
2592                                        MatSetOption_SeqBAIJ,
2593                                        MatZeroEntries_SeqBAIJ,
2594                                /* 24*/ MatZeroRows_SeqBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                /* 29*/ MatSetUp_SeqBAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                /* 34*/ MatDuplicate_SeqBAIJ,
2605                                        0,
2606                                        0,
2607                                        MatILUFactor_SeqBAIJ,
2608                                        0,
2609                                /* 39*/ MatAXPY_SeqBAIJ,
2610                                        MatGetSubMatrices_SeqBAIJ,
2611                                        MatIncreaseOverlap_SeqBAIJ,
2612                                        MatGetValues_SeqBAIJ,
2613                                        MatCopy_SeqBAIJ,
2614                                /* 44*/ 0,
2615                                        MatScale_SeqBAIJ,
2616                                        0,
2617                                        0,
2618                                        MatZeroRowsColumns_SeqBAIJ,
2619                                /* 49*/ 0,
2620                                        MatGetRowIJ_SeqBAIJ,
2621                                        MatRestoreRowIJ_SeqBAIJ,
2622                                        MatGetColumnIJ_SeqBAIJ,
2623                                        MatRestoreColumnIJ_SeqBAIJ,
2624                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        MatSetValuesBlocked_SeqBAIJ,
2629                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2630                                        MatDestroy_SeqBAIJ,
2631                                        MatView_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                /* 64*/ 0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2640                                        0,
2641                                        MatConvert_Basic,
2642                                        0,
2643                                        0,
2644                                /* 74*/ 0,
2645                                        MatFDColoringApply_BAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                /* 79*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        MatLoad_SeqBAIJ,
2654                                /* 84*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                /* 89*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                /* 94*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                /* 99*/ 0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                /*104*/ 0,
2675                                        MatRealPart_SeqBAIJ,
2676                                        MatImaginaryPart_SeqBAIJ,
2677                                        0,
2678                                        0,
2679                                /*109*/ 0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatMissingDiagonal_SeqBAIJ,
2684                                /*114*/ 0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                /*119*/ 0,
2690                                        0,
2691                                        MatMultHermitianTranspose_SeqBAIJ,
2692                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2693                                        0,
2694                                /*124*/ 0,
2695                                        0,
2696                                        MatInvertBlockDiagonal_SeqBAIJ,
2697                                        0,
2698                                        0,
2699                                /*129*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /*134*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /*139*/ 0,
2710                                        0,
2711                                        0,
2712                                        MatFDColoringSetUp_SeqXAIJ
2713 };
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2717 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2718 {
2719   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2720   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2725 
2726   /* allocate space for values if not already there */
2727   if (!aij->saved_values) {
2728     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2729     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2730   }
2731 
2732   /* copy values over */
2733   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2739 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2740 {
2741   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2744 
2745   PetscFunctionBegin;
2746   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2747   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2748 
2749   /* copy values over */
2750   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2755 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2756 
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2759 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2760 {
2761   Mat_SeqBAIJ    *b;
2762   PetscErrorCode ierr;
2763   PetscInt       i,mbs,nbs,bs2;
2764   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2765 
2766   PetscFunctionBegin;
2767   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2768   if (nz == MAT_SKIP_ALLOCATION) {
2769     skipallocation = PETSC_TRUE;
2770     nz             = 0;
2771   }
2772 
2773   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2776   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2777 
2778   B->preallocated = PETSC_TRUE;
2779 
2780   mbs = B->rmap->n/bs;
2781   nbs = B->cmap->n/bs;
2782   bs2 = bs*bs;
2783 
2784   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2785 
2786   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2787   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2788   if (nnz) {
2789     for (i=0; i<mbs; i++) {
2790       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2791       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2792     }
2793   }
2794 
2795   b    = (Mat_SeqBAIJ*)B->data;
2796   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2797   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2798   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2799 
2800   if (!flg) {
2801     switch (bs) {
2802     case 1:
2803       B->ops->mult    = MatMult_SeqBAIJ_1;
2804       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2805       break;
2806     case 2:
2807       B->ops->mult    = MatMult_SeqBAIJ_2;
2808       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2809       break;
2810     case 3:
2811       B->ops->mult    = MatMult_SeqBAIJ_3;
2812       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2813       break;
2814     case 4:
2815       B->ops->mult    = MatMult_SeqBAIJ_4;
2816       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2817       break;
2818     case 5:
2819       B->ops->mult    = MatMult_SeqBAIJ_5;
2820       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2821       break;
2822     case 6:
2823       B->ops->mult    = MatMult_SeqBAIJ_6;
2824       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2825       break;
2826     case 7:
2827       B->ops->mult    = MatMult_SeqBAIJ_7;
2828       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2829       break;
2830     case 15:
2831       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2833       break;
2834     default:
2835       B->ops->mult    = MatMult_SeqBAIJ_N;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2837       break;
2838     }
2839   }
2840   B->ops->sor = MatSOR_SeqBAIJ;
2841   b->mbs = mbs;
2842   b->nbs = nbs;
2843   if (!skipallocation) {
2844     if (!b->imax) {
2845       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2846       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2847 
2848       b->free_imax_ilen = PETSC_TRUE;
2849     }
2850     /* b->ilen will count nonzeros in each block row so far. */
2851     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2852     if (!nnz) {
2853       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2854       else if (nz < 0) nz = 1;
2855       for (i=0; i<mbs; i++) b->imax[i] = nz;
2856       nz = nz*mbs;
2857     } else {
2858       nz = 0;
2859       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2860     }
2861 
2862     /* allocate the matrix space */
2863     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2864     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2865     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2866     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2867     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2868 
2869     b->singlemalloc = PETSC_TRUE;
2870     b->i[0]         = 0;
2871     for (i=1; i<mbs+1; i++) {
2872       b->i[i] = b->i[i-1] + b->imax[i-1];
2873     }
2874     b->free_a  = PETSC_TRUE;
2875     b->free_ij = PETSC_TRUE;
2876 #if defined(PETSC_THREADCOMM_ACTIVE)
2877     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2878 #endif
2879   } else {
2880     b->free_a  = PETSC_FALSE;
2881     b->free_ij = PETSC_FALSE;
2882   }
2883 
2884   b->bs2              = bs2;
2885   b->mbs              = mbs;
2886   b->nz               = 0;
2887   b->maxnz            = nz;
2888   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2889   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2895 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2896 {
2897   PetscInt       i,m,nz,nz_max=0,*nnz;
2898   PetscScalar    *values=0;
2899   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2900   PetscErrorCode ierr;
2901 
2902   PetscFunctionBegin;
2903   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2904   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2905   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2906   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2907   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2909   m    = B->rmap->n/bs;
2910 
2911   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2912   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2913   for (i=0; i<m; i++) {
2914     nz = ii[i+1]- ii[i];
2915     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2916     nz_max = PetscMax(nz_max, nz);
2917     nnz[i] = nz;
2918   }
2919   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2920   ierr = PetscFree(nnz);CHKERRQ(ierr);
2921 
2922   values = (PetscScalar*)V;
2923   if (!values) {
2924     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2925   }
2926   for (i=0; i<m; i++) {
2927     PetscInt          ncols  = ii[i+1] - ii[i];
2928     const PetscInt    *icols = jj + ii[i];
2929     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2930     if (!roworiented) {
2931       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2932     } else {
2933       PetscInt j;
2934       for (j=0; j<ncols; j++) {
2935         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2936         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2937       }
2938     }
2939   }
2940   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2941   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2948 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2949 #if defined(PETSC_HAVE_MUMPS)
2950 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2951 #endif
2952 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2953 
2954 /*MC
2955    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2956    block sparse compressed row format.
2957 
2958    Options Database Keys:
2959 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2960 
2961   Level: beginner
2962 
2963 .seealso: MatCreateSeqBAIJ()
2964 M*/
2965 
2966 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatCreate_SeqBAIJ"
2970 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2971 {
2972   PetscErrorCode ierr;
2973   PetscMPIInt    size;
2974   Mat_SeqBAIJ    *b;
2975 
2976   PetscFunctionBegin;
2977   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2978   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2979 
2980   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2981   B->data = (void*)b;
2982   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2983 
2984   b->row          = 0;
2985   b->col          = 0;
2986   b->icol         = 0;
2987   b->reallocs     = 0;
2988   b->saved_values = 0;
2989 
2990   b->roworiented        = PETSC_TRUE;
2991   b->nonew              = 0;
2992   b->diag               = 0;
2993   b->solve_work         = 0;
2994   b->mult_work          = 0;
2995   B->spptr              = 0;
2996   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
2997   b->keepnonzeropattern = PETSC_FALSE;
2998   b->xtoy               = 0;
2999   b->XtoY               = 0;
3000 
3001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3004 #if defined(PETSC_HAVE_MUMPS)
3005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3006 #endif
3007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3023 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3024 {
3025   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3026   PetscErrorCode ierr;
3027   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3028 
3029   PetscFunctionBegin;
3030   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3031 
3032   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3033     c->imax           = a->imax;
3034     c->ilen           = a->ilen;
3035     c->free_imax_ilen = PETSC_FALSE;
3036   } else {
3037     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3038     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3039     for (i=0; i<mbs; i++) {
3040       c->imax[i] = a->imax[i];
3041       c->ilen[i] = a->ilen[i];
3042     }
3043     c->free_imax_ilen = PETSC_TRUE;
3044   }
3045 
3046   /* allocate the matrix space */
3047   if (mallocmatspace) {
3048     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3049       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3050       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3051 
3052       c->i            = a->i;
3053       c->j            = a->j;
3054       c->singlemalloc = PETSC_FALSE;
3055       c->free_a       = PETSC_TRUE;
3056       c->free_ij      = PETSC_FALSE;
3057       c->parent       = A;
3058       C->preallocated = PETSC_TRUE;
3059       C->assembled    = PETSC_TRUE;
3060 
3061       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3062       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3063       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3064     } else {
3065       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3066       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3067 
3068       c->singlemalloc = PETSC_TRUE;
3069       c->free_a       = PETSC_TRUE;
3070       c->free_ij      = PETSC_TRUE;
3071 
3072       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3073       if (mbs > 0) {
3074         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3075         if (cpvalues == MAT_COPY_VALUES) {
3076           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3077         } else {
3078           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3079         }
3080       }
3081       C->preallocated = PETSC_TRUE;
3082       C->assembled    = PETSC_TRUE;
3083     }
3084   }
3085 
3086   c->roworiented = a->roworiented;
3087   c->nonew       = a->nonew;
3088 
3089   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3090   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3091 
3092   c->bs2         = a->bs2;
3093   c->mbs         = a->mbs;
3094   c->nbs         = a->nbs;
3095 
3096   if (a->diag) {
3097     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3098       c->diag      = a->diag;
3099       c->free_diag = PETSC_FALSE;
3100     } else {
3101       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3102       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3103       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3104       c->free_diag = PETSC_TRUE;
3105     }
3106   } else c->diag = 0;
3107 
3108   c->nz         = a->nz;
3109   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3110   c->solve_work = 0;
3111   c->mult_work  = 0;
3112 
3113   c->compressedrow.use   = a->compressedrow.use;
3114   c->compressedrow.nrows = a->compressedrow.nrows;
3115   if (a->compressedrow.use) {
3116     i    = a->compressedrow.nrows;
3117     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3118     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3119     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3120     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3121   } else {
3122     c->compressedrow.use    = PETSC_FALSE;
3123     c->compressedrow.i      = NULL;
3124     c->compressedrow.rindex = NULL;
3125   }
3126   C->nonzerostate = A->nonzerostate;
3127 
3128   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3129   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3135 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3136 {
3137   PetscErrorCode ierr;
3138 
3139   PetscFunctionBegin;
3140   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3141   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3142   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3143   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3144   PetscFunctionReturn(0);
3145 }
3146 
3147 #undef __FUNCT__
3148 #define __FUNCT__ "MatLoad_SeqBAIJ"
3149 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3150 {
3151   Mat_SeqBAIJ    *a;
3152   PetscErrorCode ierr;
3153   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3154   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3155   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3156   PetscInt       *masked,nmask,tmp,bs2,ishift;
3157   PetscMPIInt    size;
3158   int            fd;
3159   PetscScalar    *aa;
3160   MPI_Comm       comm;
3161 
3162   PetscFunctionBegin;
3163   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3164   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3165   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3166   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3167   bs2  = bs*bs;
3168 
3169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3170   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3171   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3172   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3173   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3174   M = header[1]; N = header[2]; nz = header[3];
3175 
3176   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3177   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3178 
3179   /*
3180      This code adds extra rows to make sure the number of rows is
3181     divisible by the blocksize
3182   */
3183   mbs        = M/bs;
3184   extra_rows = bs - M + bs*(mbs);
3185   if (extra_rows == bs) extra_rows = 0;
3186   else mbs++;
3187   if (extra_rows) {
3188     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3189   }
3190 
3191   /* Set global sizes if not already set */
3192   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3193     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3194   } else { /* Check if the matrix global sizes are correct */
3195     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3196     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3197       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3198     }
3199     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3200   }
3201 
3202   /* read in row lengths */
3203   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3204   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3205   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3206 
3207   /* read in column indices */
3208   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3209   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3210   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3211 
3212   /* loop over row lengths determining block row lengths */
3213   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3214   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3215   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3216   rowcount = 0;
3217   nzcount  = 0;
3218   for (i=0; i<mbs; i++) {
3219     nmask = 0;
3220     for (j=0; j<bs; j++) {
3221       kmax = rowlengths[rowcount];
3222       for (k=0; k<kmax; k++) {
3223         tmp = jj[nzcount++]/bs;
3224         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3225       }
3226       rowcount++;
3227     }
3228     browlengths[i] += nmask;
3229     /* zero out the mask elements we set */
3230     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3231   }
3232 
3233   /* Do preallocation  */
3234   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3235   a    = (Mat_SeqBAIJ*)newmat->data;
3236 
3237   /* set matrix "i" values */
3238   a->i[0] = 0;
3239   for (i=1; i<= mbs; i++) {
3240     a->i[i]      = a->i[i-1] + browlengths[i-1];
3241     a->ilen[i-1] = browlengths[i-1];
3242   }
3243   a->nz = 0;
3244   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3245 
3246   /* read in nonzero values */
3247   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3248   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3249   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3250 
3251   /* set "a" and "j" values into matrix */
3252   nzcount = 0; jcount = 0;
3253   for (i=0; i<mbs; i++) {
3254     nzcountb = nzcount;
3255     nmask    = 0;
3256     for (j=0; j<bs; j++) {
3257       kmax = rowlengths[i*bs+j];
3258       for (k=0; k<kmax; k++) {
3259         tmp = jj[nzcount++]/bs;
3260         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3261       }
3262     }
3263     /* sort the masked values */
3264     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3265 
3266     /* set "j" values into matrix */
3267     maskcount = 1;
3268     for (j=0; j<nmask; j++) {
3269       a->j[jcount++]  = masked[j];
3270       mask[masked[j]] = maskcount++;
3271     }
3272     /* set "a" values into matrix */
3273     ishift = bs2*a->i[i];
3274     for (j=0; j<bs; j++) {
3275       kmax = rowlengths[i*bs+j];
3276       for (k=0; k<kmax; k++) {
3277         tmp       = jj[nzcountb]/bs;
3278         block     = mask[tmp] - 1;
3279         point     = jj[nzcountb] - bs*tmp;
3280         idx       = ishift + bs2*block + j + bs*point;
3281         a->a[idx] = (MatScalar)aa[nzcountb++];
3282       }
3283     }
3284     /* zero out the mask elements we set */
3285     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3286   }
3287   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3288 
3289   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3290   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3291   ierr = PetscFree(aa);CHKERRQ(ierr);
3292   ierr = PetscFree(jj);CHKERRQ(ierr);
3293   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3294 
3295   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatCreateSeqBAIJ"
3302 /*@C
3303    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3304    compressed row) format.  For good matrix assembly performance the
3305    user should preallocate the matrix storage by setting the parameter nz
3306    (or the array nnz).  By setting these parameters accurately, performance
3307    during matrix assembly can be increased by more than a factor of 50.
3308 
3309    Collective on MPI_Comm
3310 
3311    Input Parameters:
3312 +  comm - MPI communicator, set to PETSC_COMM_SELF
3313 .  bs - size of block
3314 .  m - number of rows
3315 .  n - number of columns
3316 .  nz - number of nonzero blocks  per block row (same for all rows)
3317 -  nnz - array containing the number of nonzero blocks in the various block rows
3318          (possibly different for each block row) or NULL
3319 
3320    Output Parameter:
3321 .  A - the matrix
3322 
3323    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3324    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3325    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3326 
3327    Options Database Keys:
3328 .   -mat_no_unroll - uses code that does not unroll the loops in the
3329                      block calculations (much slower)
3330 .    -mat_block_size - size of the blocks to use
3331 
3332    Level: intermediate
3333 
3334    Notes:
3335    The number of rows and columns must be divisible by blocksize.
3336 
3337    If the nnz parameter is given then the nz parameter is ignored
3338 
3339    A nonzero block is any block that as 1 or more nonzeros in it
3340 
3341    The block AIJ format is fully compatible with standard Fortran 77
3342    storage.  That is, the stored row and column indices can begin at
3343    either one (as in Fortran) or zero.  See the users' manual for details.
3344 
3345    Specify the preallocated storage with either nz or nnz (not both).
3346    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3347    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3348    matrices.
3349 
3350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3351 @*/
3352 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3353 {
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3358   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3359   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3360   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3366 /*@C
3367    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3368    per row in the matrix. For good matrix assembly performance the
3369    user should preallocate the matrix storage by setting the parameter nz
3370    (or the array nnz).  By setting these parameters accurately, performance
3371    during matrix assembly can be increased by more than a factor of 50.
3372 
3373    Collective on MPI_Comm
3374 
3375    Input Parameters:
3376 +  A - the matrix
3377 .  bs - size of block
3378 .  nz - number of block nonzeros per block row (same for all rows)
3379 -  nnz - array containing the number of block nonzeros in the various block rows
3380          (possibly different for each block row) or NULL
3381 
3382    Options Database Keys:
3383 .   -mat_no_unroll - uses code that does not unroll the loops in the
3384                      block calculations (much slower)
3385 .    -mat_block_size - size of the blocks to use
3386 
3387    Level: intermediate
3388 
3389    Notes:
3390    If the nnz parameter is given then the nz parameter is ignored
3391 
3392    You can call MatGetInfo() to get information on how effective the preallocation was;
3393    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3394    You can also run with the option -info and look for messages with the string
3395    malloc in them to see if additional memory allocation was needed.
3396 
3397    The block AIJ format is fully compatible with standard Fortran 77
3398    storage.  That is, the stored row and column indices can begin at
3399    either one (as in Fortran) or zero.  See the users' manual for details.
3400 
3401    Specify the preallocated storage with either nz or nnz (not both).
3402    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3403    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3404 
3405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3406 @*/
3407 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3408 {
3409   PetscErrorCode ierr;
3410 
3411   PetscFunctionBegin;
3412   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3413   PetscValidType(B,1);
3414   PetscValidLogicalCollectiveInt(B,bs,2);
3415   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3421 /*@C
3422    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3423    (the default sequential PETSc format).
3424 
3425    Collective on MPI_Comm
3426 
3427    Input Parameters:
3428 +  A - the matrix
3429 .  i - the indices into j for the start of each local row (starts with zero)
3430 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3431 -  v - optional values in the matrix
3432 
3433    Level: developer
3434 
3435    Notes:
3436    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3437    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3438    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3439    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3440    block column and the second index is over columns within a block.
3441 
3442 .keywords: matrix, aij, compressed row, sparse
3443 
3444 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3445 @*/
3446 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3447 {
3448   PetscErrorCode ierr;
3449 
3450   PetscFunctionBegin;
3451   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3452   PetscValidType(B,1);
3453   PetscValidLogicalCollectiveInt(B,bs,2);
3454   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3455   PetscFunctionReturn(0);
3456 }
3457 
3458 
3459 #undef __FUNCT__
3460 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3461 /*@
3462      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3463 
3464      Collective on MPI_Comm
3465 
3466    Input Parameters:
3467 +  comm - must be an MPI communicator of size 1
3468 .  bs - size of block
3469 .  m - number of rows
3470 .  n - number of columns
3471 .  i - row indices
3472 .  j - column indices
3473 -  a - matrix values
3474 
3475    Output Parameter:
3476 .  mat - the matrix
3477 
3478    Level: advanced
3479 
3480    Notes:
3481        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3482     once the matrix is destroyed
3483 
3484        You cannot set new nonzero locations into this matrix, that will generate an error.
3485 
3486        The i and j indices are 0 based
3487 
3488        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3489 
3490       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3491       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3492       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3493       with column-major ordering within blocks.
3494 
3495 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3496 
3497 @*/
3498 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3499 {
3500   PetscErrorCode ierr;
3501   PetscInt       ii;
3502   Mat_SeqBAIJ    *baij;
3503 
3504   PetscFunctionBegin;
3505   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3506   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3507 
3508   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3509   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3510   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3511   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3512   baij = (Mat_SeqBAIJ*)(*mat)->data;
3513   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3514   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3515 
3516   baij->i = i;
3517   baij->j = j;
3518   baij->a = a;
3519 
3520   baij->singlemalloc = PETSC_FALSE;
3521   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3522   baij->free_a       = PETSC_FALSE;
3523   baij->free_ij      = PETSC_FALSE;
3524 
3525   for (ii=0; ii<m; ii++) {
3526     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3527 #if defined(PETSC_USE_DEBUG)
3528     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3529 #endif
3530   }
3531 #if defined(PETSC_USE_DEBUG)
3532   for (ii=0; ii<baij->i[m]; ii++) {
3533     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3534     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3535   }
3536 #endif
3537 
3538   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3539   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542