xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 580bdb303e1ee3b1222b2042810b4c26340259c6)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);
952       }
953       if (N >= i) {
954         PetscArrayzero(ap+16*i,16);
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       PetscArraymove(rp+i+1,rp+i,N-i+1);
1019       PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));
1020       PetscArrayzero(ap+16*i,16);
1021       rp[i]                    = bcol;
1022       ap[16*i + 4*cidx + ridx] = value;
1023 noinsert1:;
1024       low = i;
1025     }
1026     ailen[brow] = nrow;
1027   }
1028   PetscFunctionReturnVoid();
1029 }
1030 
1031 /*
1032      Checks for missing diagonals
1033 */
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*ii = a->i,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !ii) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (diag[i] >= ii[i+1]) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1062 {
1063   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1064   PetscErrorCode ierr;
1065   PetscInt       i,j,m = a->mbs;
1066 
1067   PetscFunctionBegin;
1068   if (!a->diag) {
1069     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1070     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1071     a->free_diag = PETSC_TRUE;
1072   }
1073   for (i=0; i<m; i++) {
1074     a->diag[i] = a->i[i+1];
1075     for (j=a->i[i]; j<a->i[i+1]; j++) {
1076       if (a->j[j] == i) {
1077         a->diag[i] = j;
1078         break;
1079       }
1080     }
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 
1086 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1087 {
1088   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1089   PetscErrorCode ierr;
1090   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1091   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1092 
1093   PetscFunctionBegin;
1094   *nn = n;
1095   if (!ia) PetscFunctionReturn(0);
1096   if (symmetric) {
1097     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1098     nz   = tia[n];
1099   } else {
1100     tia = a->i; tja = a->j;
1101   }
1102 
1103   if (!blockcompressed && bs > 1) {
1104     (*nn) *= bs;
1105     /* malloc & create the natural set of indices */
1106     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1107     if (n) {
1108       (*ia)[0] = oshift;
1109       for (j=1; j<bs; j++) {
1110         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1111       }
1112     }
1113 
1114     for (i=1; i<n; i++) {
1115       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1116       for (j=1; j<bs; j++) {
1117         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1118       }
1119     }
1120     if (n) {
1121       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1122     }
1123 
1124     if (inja) {
1125       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1126       cnt = 0;
1127       for (i=0; i<n; i++) {
1128         for (j=0; j<bs; j++) {
1129           for (k=tia[i]; k<tia[i+1]; k++) {
1130             for (l=0; l<bs; l++) {
1131               (*ja)[cnt++] = bs*tja[k] + l;
1132             }
1133           }
1134         }
1135       }
1136     }
1137 
1138     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1139       ierr = PetscFree(tia);CHKERRQ(ierr);
1140       ierr = PetscFree(tja);CHKERRQ(ierr);
1141     }
1142   } else if (oshift == 1) {
1143     if (symmetric) {
1144       nz = tia[A->rmap->n/bs];
1145       /*  add 1 to i and j indices */
1146       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1147       *ia = tia;
1148       if (ja) {
1149         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1150         *ja = tja;
1151       }
1152     } else {
1153       nz = a->i[A->rmap->n/bs];
1154       /* malloc space and  add 1 to i and j indices */
1155       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1156       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1157       if (ja) {
1158         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1159         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1160       }
1161     }
1162   } else {
1163     *ia = tia;
1164     if (ja) *ja = tja;
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1170 {
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   if (!ia) PetscFunctionReturn(0);
1175   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1176     ierr = PetscFree(*ia);CHKERRQ(ierr);
1177     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1183 {
1184   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188 #if defined(PETSC_USE_LOG)
1189   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1190 #endif
1191   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1192   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1193   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1194   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1195   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1196   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1197   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1198   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1199   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1200   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1202   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1203   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1204 
1205   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1206   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1207   ierr = PetscFree(A->data);CHKERRQ(ierr);
1208 
1209   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1220 #if defined(PETSC_HAVE_HYPRE)
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1222 #endif
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1229 {
1230   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   switch (op) {
1235   case MAT_ROW_ORIENTED:
1236     a->roworiented = flg;
1237     break;
1238   case MAT_KEEP_NONZERO_PATTERN:
1239     a->keepnonzeropattern = flg;
1240     break;
1241   case MAT_NEW_NONZERO_LOCATIONS:
1242     a->nonew = (flg ? 0 : 1);
1243     break;
1244   case MAT_NEW_NONZERO_LOCATION_ERR:
1245     a->nonew = (flg ? -1 : 0);
1246     break;
1247   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1248     a->nonew = (flg ? -2 : 0);
1249     break;
1250   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1251     a->nounused = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_DIAGONALS:
1254   case MAT_IGNORE_OFF_PROC_ENTRIES:
1255   case MAT_USE_HASH_TABLE:
1256     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1257     break;
1258   case MAT_SPD:
1259   case MAT_SYMMETRIC:
1260   case MAT_STRUCTURALLY_SYMMETRIC:
1261   case MAT_HERMITIAN:
1262   case MAT_SYMMETRY_ETERNAL:
1263   case MAT_SUBMAT_SINGLEIS:
1264   case MAT_STRUCTURE_ONLY:
1265     /* These options are handled directly by MatSetOption() */
1266     break;
1267   default:
1268     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1274 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1275 {
1276   PetscErrorCode ierr;
1277   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1278   MatScalar      *aa_i;
1279   PetscScalar    *v_i;
1280 
1281   PetscFunctionBegin;
1282   bs  = A->rmap->bs;
1283   bs2 = bs*bs;
1284   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1285 
1286   bn  = row/bs;   /* Block number */
1287   bp  = row % bs; /* Block Position */
1288   M   = ai[bn+1] - ai[bn];
1289   *nz = bs*M;
1290 
1291   if (v) {
1292     *v = 0;
1293     if (*nz) {
1294       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1295       for (i=0; i<M; i++) { /* for each block in the block row */
1296         v_i  = *v + i*bs;
1297         aa_i = aa + bs2*(ai[bn] + i);
1298         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1299       }
1300     }
1301   }
1302 
1303   if (idx) {
1304     *idx = 0;
1305     if (*nz) {
1306       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1307       for (i=0; i<M; i++) { /* for each block in the block row */
1308         idx_i = *idx + i*bs;
1309         itmp  = bs*aj[ai[bn] + i];
1310         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1311       }
1312     }
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1318 {
1319   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1333   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1338 {
1339   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1340   Mat            C;
1341   PetscErrorCode ierr;
1342   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1343   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1344   MatScalar      *ata,*aa=a->a;
1345 
1346   PetscFunctionBegin;
1347   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1348   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1349     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1350 
1351     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1352     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1353     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1354     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1355 
1356     at  = (Mat_SeqBAIJ*)C->data;
1357     ati = at->i;
1358     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1359   } else {
1360     C = *B;
1361     at = (Mat_SeqBAIJ*)C->data;
1362     ati = at->i;
1363   }
1364 
1365   atj = at->j;
1366   ata = at->a;
1367 
1368   /* Copy ati into atfill so we have locations of the next free space in atj */
1369   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1370 
1371   /* Walk through A row-wise and mark nonzero entries of A^T. */
1372   for (i=0; i<mbs; i++) {
1373     anzj = ai[i+1] - ai[i];
1374     for (j=0; j<anzj; j++) {
1375       atj[atfill[*aj]] = i;
1376       for (kr=0; kr<bs; kr++) {
1377         for (k=0; k<bs; k++) {
1378           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1379         }
1380       }
1381       atfill[*aj++] += 1;
1382     }
1383   }
1384   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386 
1387   /* Clean up temporary space and complete requests. */
1388   ierr = PetscFree(atfill);CHKERRQ(ierr);
1389 
1390   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1391     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1392     *B = C;
1393   } else {
1394     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1400 {
1401   PetscErrorCode ierr;
1402   Mat            Btrans;
1403 
1404   PetscFunctionBegin;
1405   *f   = PETSC_FALSE;
1406   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1407   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1408   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1413 {
1414   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1417   int            fd;
1418   PetscScalar    *aa;
1419   FILE           *file;
1420 
1421   PetscFunctionBegin;
1422   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1423   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1424   col_lens[0] = MAT_FILE_CLASSID;
1425 
1426   col_lens[1] = A->rmap->N;
1427   col_lens[2] = A->cmap->n;
1428   col_lens[3] = a->nz*bs2;
1429 
1430   /* store lengths of each row and write (including header) to file */
1431   count = 0;
1432   for (i=0; i<a->mbs; i++) {
1433     for (j=0; j<bs; j++) {
1434       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1435     }
1436   }
1437   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1438   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1439 
1440   /* store column indices (zero start index) */
1441   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1442   count = 0;
1443   for (i=0; i<a->mbs; i++) {
1444     for (j=0; j<bs; j++) {
1445       for (k=a->i[i]; k<a->i[i+1]; k++) {
1446         for (l=0; l<bs; l++) {
1447           jj[count++] = bs*a->j[k] + l;
1448         }
1449       }
1450     }
1451   }
1452   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1453   ierr = PetscFree(jj);CHKERRQ(ierr);
1454 
1455   /* store nonzero values */
1456   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1457   count = 0;
1458   for (i=0; i<a->mbs; i++) {
1459     for (j=0; j<bs; j++) {
1460       for (k=a->i[i]; k<a->i[i+1]; k++) {
1461         for (l=0; l<bs; l++) {
1462           aa[count++] = a->a[bs2*k + l*bs + j];
1463         }
1464       }
1465     }
1466   }
1467   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1468   ierr = PetscFree(aa);CHKERRQ(ierr);
1469 
1470   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1471   if (file) {
1472     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1478 {
1479   PetscErrorCode ierr;
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscInt       i,bs = A->rmap->bs,k;
1482 
1483   PetscFunctionBegin;
1484   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1485   for (i=0; i<a->mbs; i++) {
1486     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1487     for (k=a->i[i]; k<a->i[i+1]; k++) {
1488       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1489     }
1490     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1491   }
1492   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1497 {
1498   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1499   PetscErrorCode    ierr;
1500   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1501   PetscViewerFormat format;
1502 
1503   PetscFunctionBegin;
1504   if (A->structure_only) {
1505     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1506     PetscFunctionReturn(0);
1507   }
1508 
1509   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1510   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1511     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1512   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1513     const char *matname;
1514     Mat        aij;
1515     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1516     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1517     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1518     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1521       PetscFunctionReturn(0);
1522   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1532                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1535                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             if (a->a[bs2*k + l*bs + j] != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1542             }
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1550   } else {
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1552     for (i=0; i<a->mbs; i++) {
1553       for (j=0; j<bs; j++) {
1554         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1555         for (k=a->i[i]; k<a->i[i+1]; k++) {
1556           for (l=0; l<bs; l++) {
1557 #if defined(PETSC_USE_COMPLEX)
1558             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1560                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1563                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             }
1567 #else
1568             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1569 #endif
1570           }
1571         }
1572         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1573       }
1574     }
1575     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1576   }
1577   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #include <petscdraw.h>
1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1583 {
1584   Mat               A = (Mat) Aa;
1585   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1586   PetscErrorCode    ierr;
1587   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1588   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1589   MatScalar         *aa;
1590   PetscViewer       viewer;
1591   PetscViewerFormat format;
1592 
1593   PetscFunctionBegin;
1594   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1596   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1597 
1598   /* loop over matrix elements drawing boxes */
1599 
1600   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1601     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1602     /* Blue for negative, Cyan for zero and  Red for positive */
1603     color = PETSC_DRAW_BLUE;
1604     for (i=0,row=0; i<mbs; i++,row+=bs) {
1605       for (j=a->i[i]; j<a->i[i+1]; j++) {
1606         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1607         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1608         aa  = a->a + j*bs2;
1609         for (k=0; k<bs; k++) {
1610           for (l=0; l<bs; l++) {
1611             if (PetscRealPart(*aa++) >=  0.) continue;
1612             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1613           }
1614         }
1615       }
1616     }
1617     color = PETSC_DRAW_CYAN;
1618     for (i=0,row=0; i<mbs; i++,row+=bs) {
1619       for (j=a->i[i]; j<a->i[i+1]; j++) {
1620         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1621         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1622         aa  = a->a + j*bs2;
1623         for (k=0; k<bs; k++) {
1624           for (l=0; l<bs; l++) {
1625             if (PetscRealPart(*aa++) != 0.) continue;
1626             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1627           }
1628         }
1629       }
1630     }
1631     color = PETSC_DRAW_RED;
1632     for (i=0,row=0; i<mbs; i++,row+=bs) {
1633       for (j=a->i[i]; j<a->i[i+1]; j++) {
1634         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1635         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1636         aa  = a->a + j*bs2;
1637         for (k=0; k<bs; k++) {
1638           for (l=0; l<bs; l++) {
1639             if (PetscRealPart(*aa++) <= 0.) continue;
1640             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1641           }
1642         }
1643       }
1644     }
1645     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1646   } else {
1647     /* use contour shading to indicate magnitude of values */
1648     /* first determine max of all nonzero values */
1649     PetscReal minv = 0.0, maxv = 0.0;
1650     PetscDraw popup;
1651 
1652     for (i=0; i<a->nz*a->bs2; i++) {
1653       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1654     }
1655     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1656     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1657     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1658 
1659     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1660     for (i=0,row=0; i<mbs; i++,row+=bs) {
1661       for (j=a->i[i]; j<a->i[i+1]; j++) {
1662         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1663         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1664         aa  = a->a + j*bs2;
1665         for (k=0; k<bs; k++) {
1666           for (l=0; l<bs; l++) {
1667             MatScalar v = *aa++;
1668             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1669             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1670           }
1671         }
1672       }
1673     }
1674     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1680 {
1681   PetscErrorCode ierr;
1682   PetscReal      xl,yl,xr,yr,w,h;
1683   PetscDraw      draw;
1684   PetscBool      isnull;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1688   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1689   if (isnull) PetscFunctionReturn(0);
1690 
1691   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1692   xr  += w;          yr += h;        xl = -w;     yl = -h;
1693   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1694   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1695   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1697   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1702 {
1703   PetscErrorCode ierr;
1704   PetscBool      iascii,isbinary,isdraw;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1710   if (iascii) {
1711     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1712   } else if (isbinary) {
1713     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1714   } else if (isdraw) {
1715     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1716   } else {
1717     Mat B;
1718     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1719     ierr = MatView(B,viewer);CHKERRQ(ierr);
1720     ierr = MatDestroy(&B);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 
1726 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1727 {
1728   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1729   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1730   PetscInt    *ai = a->i,*ailen = a->ilen;
1731   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1732   MatScalar   *ap,*aa = a->a;
1733 
1734   PetscFunctionBegin;
1735   for (k=0; k<m; k++) { /* loop over rows */
1736     row = im[k]; brow = row/bs;
1737     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1738     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1739     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1740     nrow = ailen[brow];
1741     for (l=0; l<n; l++) { /* loop over columns */
1742       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1743       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1744       col  = in[l];
1745       bcol = col/bs;
1746       cidx = col%bs;
1747       ridx = row%bs;
1748       high = nrow;
1749       low  = 0; /* assume unsorted */
1750       while (high-low > 5) {
1751         t = (low+high)/2;
1752         if (rp[t] > bcol) high = t;
1753         else             low  = t;
1754       }
1755       for (i=low; i<high; i++) {
1756         if (rp[i] > bcol) break;
1757         if (rp[i] == bcol) {
1758           *v++ = ap[bs2*i+bs*cidx+ridx];
1759           goto finished;
1760         }
1761       }
1762       *v++ = 0.0;
1763 finished:;
1764     }
1765   }
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1770 {
1771   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1772   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1773   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1774   PetscErrorCode    ierr;
1775   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1776   PetscBool         roworiented=a->roworiented;
1777   const PetscScalar *value     = v;
1778   MatScalar         *ap=NULL,*aa = a->a,*bap;
1779 
1780   PetscFunctionBegin;
1781   if (roworiented) {
1782     stepval = (n-1)*bs;
1783   } else {
1784     stepval = (m-1)*bs;
1785   }
1786   for (k=0; k<m; k++) { /* loop over added rows */
1787     row = im[k];
1788     if (row < 0) continue;
1789 #if defined(PETSC_USE_DEBUG)
1790     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1791 #endif
1792     rp   = aj + ai[row];
1793     if (!A->structure_only) ap = aa + bs2*ai[row];
1794     rmax = imax[row];
1795     nrow = ailen[row];
1796     low  = 0;
1797     high = nrow;
1798     for (l=0; l<n; l++) { /* loop over added columns */
1799       if (in[l] < 0) continue;
1800 #if defined(PETSC_USE_DEBUG)
1801       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1802 #endif
1803       col = in[l];
1804       if (!A->structure_only) {
1805         if (roworiented) {
1806           value = v + (k*(stepval+bs) + l)*bs;
1807         } else {
1808           value = v + (l*(stepval+bs) + k)*bs;
1809         }
1810       }
1811       if (col <= lastcol) low = 0;
1812       else high = nrow;
1813       lastcol = col;
1814       while (high-low > 7) {
1815         t = (low+high)/2;
1816         if (rp[t] > col) high = t;
1817         else             low  = t;
1818       }
1819       for (i=low; i<high; i++) {
1820         if (rp[i] > col) break;
1821         if (rp[i] == col) {
1822           if (A->structure_only) goto noinsert2;
1823           bap = ap +  bs2*i;
1824           if (roworiented) {
1825             if (is == ADD_VALUES) {
1826               for (ii=0; ii<bs; ii++,value+=stepval) {
1827                 for (jj=ii; jj<bs2; jj+=bs) {
1828                   bap[jj] += *value++;
1829                 }
1830               }
1831             } else {
1832               for (ii=0; ii<bs; ii++,value+=stepval) {
1833                 for (jj=ii; jj<bs2; jj+=bs) {
1834                   bap[jj] = *value++;
1835                 }
1836               }
1837             }
1838           } else {
1839             if (is == ADD_VALUES) {
1840               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1841                 for (jj=0; jj<bs; jj++) {
1842                   bap[jj] += value[jj];
1843                 }
1844                 bap += bs;
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   bap[jj]  = value[jj];
1850                 }
1851                 bap += bs;
1852               }
1853             }
1854           }
1855           goto noinsert2;
1856         }
1857       }
1858       if (nonew == 1) goto noinsert2;
1859       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1860       if (A->structure_only) {
1861         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1862       } else {
1863         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1864       }
1865       N = nrow++ - 1; high++;
1866       /* shift up all the later entries in this row */
1867       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1868       rp[i] = col;
1869       if (!A->structure_only) {
1870         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1871         bap   = ap +  bs2*i;
1872         if (roworiented) {
1873           for (ii=0; ii<bs; ii++,value+=stepval) {
1874             for (jj=ii; jj<bs2; jj+=bs) {
1875               bap[jj] = *value++;
1876             }
1877           }
1878         } else {
1879           for (ii=0; ii<bs; ii++,value+=stepval) {
1880             for (jj=0; jj<bs; jj++) {
1881               *bap++ = *value++;
1882             }
1883           }
1884         }
1885       }
1886 noinsert2:;
1887       low = i;
1888     }
1889     ailen[row] = nrow;
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1895 {
1896   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1897   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1898   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1899   PetscErrorCode ierr;
1900   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1901   MatScalar      *aa  = a->a,*ap;
1902   PetscReal      ratio=0.6;
1903 
1904   PetscFunctionBegin;
1905   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1906 
1907   if (m) rmax = ailen[0];
1908   for (i=1; i<mbs; i++) {
1909     /* move each row back by the amount of empty slots (fshift) before it*/
1910     fshift += imax[i-1] - ailen[i-1];
1911     rmax    = PetscMax(rmax,ailen[i]);
1912     if (fshift) {
1913       ip = aj + ai[i];
1914       ap = aa + bs2*ai[i];
1915       N  = ailen[i];
1916       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1917       if (!A->structure_only) {
1918         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1919       }
1920     }
1921     ai[i] = ai[i-1] + ailen[i-1];
1922   }
1923   if (mbs) {
1924     fshift += imax[mbs-1] - ailen[mbs-1];
1925     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1926   }
1927 
1928   /* reset ilen and imax for each row */
1929   a->nonzerorowcnt = 0;
1930   if (A->structure_only) {
1931     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1932   } else { /* !A->structure_only */
1933     for (i=0; i<mbs; i++) {
1934       ailen[i] = imax[i] = ai[i+1] - ai[i];
1935       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1936     }
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1951 
1952   A->info.mallocs    += a->reallocs;
1953   a->reallocs         = 0;
1954   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1955   a->rmax             = rmax;
1956 
1957   if (!A->structure_only) {
1958     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 /*
1964    This function returns an array of flags which indicate the locations of contiguous
1965    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1966    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1967    Assume: sizes should be long enough to hold all the values.
1968 */
1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1970 {
1971   PetscInt  i,j,k,row;
1972   PetscBool flg;
1973 
1974   PetscFunctionBegin;
1975   for (i=0,j=0; i<n; j++) {
1976     row = idx[i];
1977     if (row%bs!=0) { /* Not the begining of a block */
1978       sizes[j] = 1;
1979       i++;
1980     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1981       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1982       i++;
1983     } else { /* Begining of the block, so check if the complete block exists */
1984       flg = PETSC_TRUE;
1985       for (k=1; k<bs; k++) {
1986         if (row+k != idx[i+k]) { /* break in the block */
1987           flg = PETSC_FALSE;
1988           break;
1989         }
1990       }
1991       if (flg) { /* No break in the bs */
1992         sizes[j] = bs;
1993         i       += bs;
1994       } else {
1995         sizes[j] = 1;
1996         i++;
1997       }
1998     }
1999   }
2000   *bs_max = j;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2005 {
2006   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2007   PetscErrorCode    ierr;
2008   PetscInt          i,j,k,count,*rows;
2009   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2010   PetscScalar       zero = 0.0;
2011   MatScalar         *aa;
2012   const PetscScalar *xx;
2013   PetscScalar       *bb;
2014 
2015   PetscFunctionBegin;
2016   /* fix right hand side if needed */
2017   if (x && b) {
2018     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2019     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2020     for (i=0; i<is_n; i++) {
2021       bb[is_idx[i]] = diag*xx[is_idx[i]];
2022     }
2023     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2024     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2025   }
2026 
2027   /* Make a copy of the IS and  sort it */
2028   /* allocate memory for rows,sizes */
2029   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2030 
2031   /* copy IS values to rows, and sort them */
2032   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2033   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2034 
2035   if (baij->keepnonzeropattern) {
2036     for (i=0; i<is_n; i++) sizes[i] = 1;
2037     bs_max          = is_n;
2038   } else {
2039     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2040     A->nonzerostate++;
2041   }
2042 
2043   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2044     row = rows[j];
2045     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2046     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2047     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2048     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2049       if (diag != (PetscScalar)0.0) {
2050         if (baij->ilen[row/bs] > 0) {
2051           baij->ilen[row/bs]       = 1;
2052           baij->j[baij->i[row/bs]] = row/bs;
2053 
2054           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2055         }
2056         /* Now insert all the diagonal values for this bs */
2057         for (k=0; k<bs; k++) {
2058           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059         }
2060       } else { /* (diag == 0.0) */
2061         baij->ilen[row/bs] = 0;
2062       } /* end (diag == 0.0) */
2063     } else { /* (sizes[i] != bs) */
2064 #if defined(PETSC_USE_DEBUG)
2065       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2066 #endif
2067       for (k=0; k<count; k++) {
2068         aa[0] =  zero;
2069         aa   += bs;
2070       }
2071       if (diag != (PetscScalar)0.0) {
2072         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2073       }
2074     }
2075   }
2076 
2077   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2083 {
2084   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2085   PetscErrorCode    ierr;
2086   PetscInt          i,j,k,count;
2087   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2088   PetscScalar       zero = 0.0;
2089   MatScalar         *aa;
2090   const PetscScalar *xx;
2091   PetscScalar       *bb;
2092   PetscBool         *zeroed,vecs = PETSC_FALSE;
2093 
2094   PetscFunctionBegin;
2095   /* fix right hand side if needed */
2096   if (x && b) {
2097     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2098     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2099     vecs = PETSC_TRUE;
2100   }
2101 
2102   /* zero the columns */
2103   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2104   for (i=0; i<is_n; i++) {
2105     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2106     zeroed[is_idx[i]] = PETSC_TRUE;
2107   }
2108   for (i=0; i<A->rmap->N; i++) {
2109     if (!zeroed[i]) {
2110       row = i/bs;
2111       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2112         for (k=0; k<bs; k++) {
2113           col = bs*baij->j[j] + k;
2114           if (zeroed[col]) {
2115             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2116             if (vecs) bb[i] -= aa[0]*xx[col];
2117             aa[0] = 0.0;
2118           }
2119         }
2120       }
2121     } else if (vecs) bb[i] = diag*xx[i];
2122   }
2123   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2124   if (vecs) {
2125     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2126     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2127   }
2128 
2129   /* zero the rows */
2130   for (i=0; i<is_n; i++) {
2131     row   = is_idx[i];
2132     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2133     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2134     for (k=0; k<count; k++) {
2135       aa[0] =  zero;
2136       aa   += bs;
2137     }
2138     if (diag != (PetscScalar)0.0) {
2139       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2140     }
2141   }
2142   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2147 {
2148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2149   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2150   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2151   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2152   PetscErrorCode ierr;
2153   PetscInt       ridx,cidx,bs2=a->bs2;
2154   PetscBool      roworiented=a->roworiented;
2155   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2156 
2157   PetscFunctionBegin;
2158   for (k=0; k<m; k++) { /* loop over added rows */
2159     row  = im[k];
2160     brow = row/bs;
2161     if (row < 0) continue;
2162 #if defined(PETSC_USE_DEBUG)
2163     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2164 #endif
2165     rp   = aj + ai[brow];
2166     if (!A->structure_only) ap = aa + bs2*ai[brow];
2167     rmax = imax[brow];
2168     nrow = ailen[brow];
2169     low  = 0;
2170     high = nrow;
2171     for (l=0; l<n; l++) { /* loop over added columns */
2172       if (in[l] < 0) continue;
2173 #if defined(PETSC_USE_DEBUG)
2174       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2175 #endif
2176       col  = in[l]; bcol = col/bs;
2177       ridx = row % bs; cidx = col % bs;
2178       if (!A->structure_only) {
2179         if (roworiented) {
2180           value = v[l + k*n];
2181         } else {
2182           value = v[k + l*m];
2183         }
2184       }
2185       if (col <= lastcol) low = 0; else high = nrow;
2186       lastcol = col;
2187       while (high-low > 7) {
2188         t = (low+high)/2;
2189         if (rp[t] > bcol) high = t;
2190         else              low  = t;
2191       }
2192       for (i=low; i<high; i++) {
2193         if (rp[i] > bcol) break;
2194         if (rp[i] == bcol) {
2195           bap = ap +  bs2*i + bs*cidx + ridx;
2196           if (!A->structure_only) {
2197             if (is == ADD_VALUES) *bap += value;
2198             else                  *bap  = value;
2199           }
2200           goto noinsert1;
2201         }
2202       }
2203       if (nonew == 1) goto noinsert1;
2204       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2205       if (A->structure_only) {
2206         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2207       } else {
2208         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2209       }
2210       N = nrow++ - 1; high++;
2211       /* shift up all the later entries in this row */
2212       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2213       rp[i] = bcol;
2214       if (!A->structure_only) {
2215         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2216         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2217         ap[bs2*i + bs*cidx + ridx] = value;
2218       }
2219       a->nz++;
2220       A->nonzerostate++;
2221 noinsert1:;
2222       low = i;
2223     }
2224     ailen[brow] = nrow;
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2230 {
2231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2232   Mat            outA;
2233   PetscErrorCode ierr;
2234   PetscBool      row_identity,col_identity;
2235 
2236   PetscFunctionBegin;
2237   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2238   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2239   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2240   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2241 
2242   outA            = inA;
2243   inA->factortype = MAT_FACTOR_LU;
2244   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2245   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2246 
2247   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2248 
2249   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2250   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2251   a->row = row;
2252   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2254   a->col = col;
2255 
2256   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2257   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2258   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2259   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2260 
2261   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2262   if (!a->solve_work) {
2263     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2264     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2265   }
2266   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2271 {
2272   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2273   PetscInt    i,nz,mbs;
2274 
2275   PetscFunctionBegin;
2276   nz  = baij->maxnz;
2277   mbs = baij->mbs;
2278   for (i=0; i<nz; i++) {
2279     baij->j[i] = indices[i];
2280   }
2281   baij->nz = nz;
2282   for (i=0; i<mbs; i++) {
2283     baij->ilen[i] = baij->imax[i];
2284   }
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 /*@
2289     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2290        in the matrix.
2291 
2292   Input Parameters:
2293 +  mat - the SeqBAIJ matrix
2294 -  indices - the column indices
2295 
2296   Level: advanced
2297 
2298   Notes:
2299     This can be called if you have precomputed the nonzero structure of the
2300   matrix and want to provide it to the matrix object to improve the performance
2301   of the MatSetValues() operation.
2302 
2303     You MUST have set the correct numbers of nonzeros per row in the call to
2304   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2305 
2306     MUST be called before any calls to MatSetValues();
2307 
2308 @*/
2309 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2315   PetscValidPointer(indices,2);
2316   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2321 {
2322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2323   PetscErrorCode ierr;
2324   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2325   PetscReal      atmp;
2326   PetscScalar    *x,zero = 0.0;
2327   MatScalar      *aa;
2328   PetscInt       ncols,brow,krow,kcol;
2329 
2330   PetscFunctionBegin;
2331   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2332   bs  = A->rmap->bs;
2333   aa  = a->a;
2334   ai  = a->i;
2335   aj  = a->j;
2336   mbs = a->mbs;
2337 
2338   ierr = VecSet(v,zero);CHKERRQ(ierr);
2339   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2340   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2341   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2342   for (i=0; i<mbs; i++) {
2343     ncols = ai[1] - ai[0]; ai++;
2344     brow  = bs*i;
2345     for (j=0; j<ncols; j++) {
2346       for (kcol=0; kcol<bs; kcol++) {
2347         for (krow=0; krow<bs; krow++) {
2348           atmp = PetscAbsScalar(*aa);aa++;
2349           row  = brow + krow;   /* row index */
2350           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2351         }
2352       }
2353       aj++;
2354     }
2355   }
2356   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   /* If the two matrices have the same copy implementation, use fast copy. */
2366   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2367     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2368     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2369     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2370 
2371     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2372     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2373     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2374     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2375   } else {
2376     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2391 {
2392   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2393 
2394   PetscFunctionBegin;
2395   *array = a->a;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2400 {
2401   PetscFunctionBegin;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406 {
2407   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2409   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   /* Set the number of nonzeros in the new matrix */
2414   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2419 {
2420   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2421   PetscErrorCode ierr;
2422   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2423   PetscBLASInt   one=1;
2424 
2425   PetscFunctionBegin;
2426   if (str == SAME_NONZERO_PATTERN) {
2427     PetscScalar  alpha = a;
2428     PetscBLASInt bnz;
2429     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2430     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2431     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2432   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2433     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2434   } else {
2435     Mat      B;
2436     PetscInt *nnz;
2437     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2438     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2439     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2440     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2441     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2442     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2443     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2444     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2445     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2446     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2447     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2448     ierr = PetscFree(nnz);CHKERRQ(ierr);
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2465 {
2466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2467   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2468   MatScalar   *aa = a->a;
2469 
2470   PetscFunctionBegin;
2471   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 /*
2476     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2477 */
2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2479 {
2480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2481   PetscErrorCode ierr;
2482   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2483   PetscInt       nz = a->i[m],row,*jj,mr,col;
2484 
2485   PetscFunctionBegin;
2486   *nn = n;
2487   if (!ia) PetscFunctionReturn(0);
2488   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2489   else {
2490     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2491     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2492     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2493     jj   = a->j;
2494     for (i=0; i<nz; i++) {
2495       collengths[jj[i]]++;
2496     }
2497     cia[0] = oshift;
2498     for (i=0; i<n; i++) {
2499       cia[i+1] = cia[i] + collengths[i];
2500     }
2501     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2502     jj   = a->j;
2503     for (row=0; row<m; row++) {
2504       mr = a->i[row+1] - a->i[row];
2505       for (i=0; i<mr; i++) {
2506         col = *jj++;
2507 
2508         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2509       }
2510     }
2511     ierr = PetscFree(collengths);CHKERRQ(ierr);
2512     *ia  = cia; *ja = cja;
2513   }
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2518 {
2519   PetscErrorCode ierr;
2520 
2521   PetscFunctionBegin;
2522   if (!ia) PetscFunctionReturn(0);
2523   ierr = PetscFree(*ia);CHKERRQ(ierr);
2524   ierr = PetscFree(*ja);CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 /*
2529  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2530  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2531  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2532  */
2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2534 {
2535   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2536   PetscErrorCode ierr;
2537   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2538   PetscInt       nz = a->i[m],row,*jj,mr,col;
2539   PetscInt       *cspidx;
2540 
2541   PetscFunctionBegin;
2542   *nn = n;
2543   if (!ia) PetscFunctionReturn(0);
2544 
2545   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2546   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2547   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2549   jj   = a->j;
2550   for (i=0; i<nz; i++) {
2551     collengths[jj[i]]++;
2552   }
2553   cia[0] = oshift;
2554   for (i=0; i<n; i++) {
2555     cia[i+1] = cia[i] + collengths[i];
2556   }
2557   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2558   jj   = a->j;
2559   for (row=0; row<m; row++) {
2560     mr = a->i[row+1] - a->i[row];
2561     for (i=0; i<mr; i++) {
2562       col = *jj++;
2563       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2564       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2565     }
2566   }
2567   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2568   *ia    = cia; *ja = cja;
2569   *spidx = cspidx;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2574 {
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2579   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2584 {
2585   PetscErrorCode ierr;
2586   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2587 
2588   PetscFunctionBegin;
2589   if (!Y->preallocated || !aij->nz) {
2590     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2591   }
2592   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 /* -------------------------------------------------------------------*/
2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2598                                        MatGetRow_SeqBAIJ,
2599                                        MatRestoreRow_SeqBAIJ,
2600                                        MatMult_SeqBAIJ_N,
2601                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2602                                        MatMultTranspose_SeqBAIJ,
2603                                        MatMultTransposeAdd_SeqBAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                /* 10*/ 0,
2608                                        MatLUFactor_SeqBAIJ,
2609                                        0,
2610                                        0,
2611                                        MatTranspose_SeqBAIJ,
2612                                /* 15*/ MatGetInfo_SeqBAIJ,
2613                                        MatEqual_SeqBAIJ,
2614                                        MatGetDiagonal_SeqBAIJ,
2615                                        MatDiagonalScale_SeqBAIJ,
2616                                        MatNorm_SeqBAIJ,
2617                                /* 20*/ 0,
2618                                        MatAssemblyEnd_SeqBAIJ,
2619                                        MatSetOption_SeqBAIJ,
2620                                        MatZeroEntries_SeqBAIJ,
2621                                /* 24*/ MatZeroRows_SeqBAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                /* 29*/ MatSetUp_SeqBAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                /* 34*/ MatDuplicate_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        MatILUFactor_SeqBAIJ,
2635                                        0,
2636                                /* 39*/ MatAXPY_SeqBAIJ,
2637                                        MatCreateSubMatrices_SeqBAIJ,
2638                                        MatIncreaseOverlap_SeqBAIJ,
2639                                        MatGetValues_SeqBAIJ,
2640                                        MatCopy_SeqBAIJ,
2641                                /* 44*/ 0,
2642                                        MatScale_SeqBAIJ,
2643                                        MatShift_SeqBAIJ,
2644                                        0,
2645                                        MatZeroRowsColumns_SeqBAIJ,
2646                                /* 49*/ 0,
2647                                        MatGetRowIJ_SeqBAIJ,
2648                                        MatRestoreRowIJ_SeqBAIJ,
2649                                        MatGetColumnIJ_SeqBAIJ,
2650                                        MatRestoreColumnIJ_SeqBAIJ,
2651                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        MatSetValuesBlocked_SeqBAIJ,
2656                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2657                                        MatDestroy_SeqBAIJ,
2658                                        MatView_SeqBAIJ,
2659                                        0,
2660                                        0,
2661                                /* 64*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2667                                        0,
2668                                        MatConvert_Basic,
2669                                        0,
2670                                        0,
2671                                /* 74*/ 0,
2672                                        MatFDColoringApply_BAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 79*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        MatLoad_SeqBAIJ,
2681                                /* 84*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                /* 89*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 94*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /* 99*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /*104*/ 0,
2702                                        MatRealPart_SeqBAIJ,
2703                                        MatImaginaryPart_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*109*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        MatMissingDiagonal_SeqBAIJ,
2711                                /*114*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*119*/ 0,
2717                                        0,
2718                                        MatMultHermitianTranspose_SeqBAIJ,
2719                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2720                                        0,
2721                                /*124*/ 0,
2722                                        0,
2723                                        MatInvertBlockDiagonal_SeqBAIJ,
2724                                        0,
2725                                        0,
2726                                /*129*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /*134*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /*139*/ MatSetBlockSizes_Default,
2737                                        0,
2738                                        0,
2739                                        MatFDColoringSetUp_SeqXAIJ,
2740                                        0,
2741                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2742                                        MatDestroySubMatrices_SeqBAIJ
2743 };
2744 
2745 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2746 {
2747   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2748   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2753 
2754   /* allocate space for values if not already there */
2755   if (!aij->saved_values) {
2756     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2757     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2758   }
2759 
2760   /* copy values over */
2761   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2766 {
2767   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2768   PetscErrorCode ierr;
2769   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2770 
2771   PetscFunctionBegin;
2772   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2773   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2774 
2775   /* copy values over */
2776   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2782 
2783 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2784 {
2785   Mat_SeqBAIJ    *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i,mbs,nbs,bs2;
2788   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2789 
2790   PetscFunctionBegin;
2791   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2792   if (nz == MAT_SKIP_ALLOCATION) {
2793     skipallocation = PETSC_TRUE;
2794     nz             = 0;
2795   }
2796 
2797   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2798   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2799   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2801 
2802   B->preallocated = PETSC_TRUE;
2803 
2804   mbs = B->rmap->n/bs;
2805   nbs = B->cmap->n/bs;
2806   bs2 = bs*bs;
2807 
2808   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2809 
2810   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2811   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2812   if (nnz) {
2813     for (i=0; i<mbs; i++) {
2814       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2815       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2816     }
2817   }
2818 
2819   b    = (Mat_SeqBAIJ*)B->data;
2820   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2821   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2822   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2823 
2824   if (!flg) {
2825     switch (bs) {
2826     case 1:
2827       B->ops->mult    = MatMult_SeqBAIJ_1;
2828       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2829       break;
2830     case 2:
2831       B->ops->mult    = MatMult_SeqBAIJ_2;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2833       break;
2834     case 3:
2835       B->ops->mult    = MatMult_SeqBAIJ_3;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2837       break;
2838     case 4:
2839       B->ops->mult    = MatMult_SeqBAIJ_4;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2841       break;
2842     case 5:
2843       B->ops->mult    = MatMult_SeqBAIJ_5;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2845       break;
2846     case 6:
2847       B->ops->mult    = MatMult_SeqBAIJ_6;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2849       break;
2850     case 7:
2851       B->ops->mult    = MatMult_SeqBAIJ_7;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2853       break;
2854     case 9:
2855 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2856       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2858 #else
2859       B->ops->mult    = MatMult_SeqBAIJ_N;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2861 #endif
2862       break;
2863     case 11:
2864       B->ops->mult    = MatMult_SeqBAIJ_11;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2866       break;
2867     case 15:
2868       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2870       break;
2871     default:
2872       B->ops->mult    = MatMult_SeqBAIJ_N;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2874       break;
2875     }
2876   }
2877   B->ops->sor = MatSOR_SeqBAIJ;
2878   b->mbs = mbs;
2879   b->nbs = nbs;
2880   if (!skipallocation) {
2881     if (!b->imax) {
2882       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2883       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2884 
2885       b->free_imax_ilen = PETSC_TRUE;
2886     }
2887     /* b->ilen will count nonzeros in each block row so far. */
2888     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2889     if (!nnz) {
2890       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2891       else if (nz < 0) nz = 1;
2892       nz = PetscMin(nz,nbs);
2893       for (i=0; i<mbs; i++) b->imax[i] = nz;
2894       nz = nz*mbs;
2895     } else {
2896       nz = 0;
2897       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2898     }
2899 
2900     /* allocate the matrix space */
2901     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2902     if (B->structure_only) {
2903       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2904       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2905       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2906     } else {
2907       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2908       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2909       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2910     }
2911     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2912 
2913     if (B->structure_only) {
2914       b->singlemalloc = PETSC_FALSE;
2915       b->free_a       = PETSC_FALSE;
2916     } else {
2917       b->singlemalloc = PETSC_TRUE;
2918       b->free_a       = PETSC_TRUE;
2919     }
2920     b->free_ij = PETSC_TRUE;
2921 
2922     b->i[0] = 0;
2923     for (i=1; i<mbs+1; i++) {
2924       b->i[i] = b->i[i-1] + b->imax[i-1];
2925     }
2926 
2927   } else {
2928     b->free_a  = PETSC_FALSE;
2929     b->free_ij = PETSC_FALSE;
2930   }
2931 
2932   b->bs2              = bs2;
2933   b->mbs              = mbs;
2934   b->nz               = 0;
2935   b->maxnz            = nz;
2936   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2937   B->was_assembled    = PETSC_FALSE;
2938   B->assembled        = PETSC_FALSE;
2939   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2944 {
2945   PetscInt       i,m,nz,nz_max=0,*nnz;
2946   PetscScalar    *values=0;
2947   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2948   PetscErrorCode ierr;
2949 
2950   PetscFunctionBegin;
2951   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2952   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2953   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2954   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2957   m    = B->rmap->n/bs;
2958 
2959   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2960   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2961   for (i=0; i<m; i++) {
2962     nz = ii[i+1]- ii[i];
2963     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2964     nz_max = PetscMax(nz_max, nz);
2965     nnz[i] = nz;
2966   }
2967   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2968   ierr = PetscFree(nnz);CHKERRQ(ierr);
2969 
2970   values = (PetscScalar*)V;
2971   if (!values) {
2972     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2973   }
2974   for (i=0; i<m; i++) {
2975     PetscInt          ncols  = ii[i+1] - ii[i];
2976     const PetscInt    *icols = jj + ii[i];
2977     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2978     if (!roworiented) {
2979       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2980     } else {
2981       PetscInt j;
2982       for (j=0; j<ncols; j++) {
2983         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2984         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2985       }
2986     }
2987   }
2988   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2991   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 /*MC
2996    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2997    block sparse compressed row format.
2998 
2999    Options Database Keys:
3000 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3001 
3002   Level: beginner
3003 
3004 .seealso: MatCreateSeqBAIJ()
3005 M*/
3006 
3007 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3008 
3009 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3010 {
3011   PetscErrorCode ierr;
3012   PetscMPIInt    size;
3013   Mat_SeqBAIJ    *b;
3014 
3015   PetscFunctionBegin;
3016   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3017   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3018 
3019   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3020   B->data = (void*)b;
3021   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3022 
3023   b->row          = 0;
3024   b->col          = 0;
3025   b->icol         = 0;
3026   b->reallocs     = 0;
3027   b->saved_values = 0;
3028 
3029   b->roworiented        = PETSC_TRUE;
3030   b->nonew              = 0;
3031   b->diag               = 0;
3032   B->spptr              = 0;
3033   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3034   b->keepnonzeropattern = PETSC_FALSE;
3035 
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3045 #if defined(PETSC_HAVE_HYPRE)
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3047 #endif
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3055 {
3056   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3057   PetscErrorCode ierr;
3058   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3059 
3060   PetscFunctionBegin;
3061   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3062 
3063   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3064     c->imax           = a->imax;
3065     c->ilen           = a->ilen;
3066     c->free_imax_ilen = PETSC_FALSE;
3067   } else {
3068     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3069     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3070     for (i=0; i<mbs; i++) {
3071       c->imax[i] = a->imax[i];
3072       c->ilen[i] = a->ilen[i];
3073     }
3074     c->free_imax_ilen = PETSC_TRUE;
3075   }
3076 
3077   /* allocate the matrix space */
3078   if (mallocmatspace) {
3079     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3080       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3081       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3082 
3083       c->i            = a->i;
3084       c->j            = a->j;
3085       c->singlemalloc = PETSC_FALSE;
3086       c->free_a       = PETSC_TRUE;
3087       c->free_ij      = PETSC_FALSE;
3088       c->parent       = A;
3089       C->preallocated = PETSC_TRUE;
3090       C->assembled    = PETSC_TRUE;
3091 
3092       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3093       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3094       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3095     } else {
3096       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3097       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3098 
3099       c->singlemalloc = PETSC_TRUE;
3100       c->free_a       = PETSC_TRUE;
3101       c->free_ij      = PETSC_TRUE;
3102 
3103       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3104       if (mbs > 0) {
3105         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3106         if (cpvalues == MAT_COPY_VALUES) {
3107           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3108         } else {
3109           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3110         }
3111       }
3112       C->preallocated = PETSC_TRUE;
3113       C->assembled    = PETSC_TRUE;
3114     }
3115   }
3116 
3117   c->roworiented = a->roworiented;
3118   c->nonew       = a->nonew;
3119 
3120   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3121   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3122 
3123   c->bs2         = a->bs2;
3124   c->mbs         = a->mbs;
3125   c->nbs         = a->nbs;
3126 
3127   if (a->diag) {
3128     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3129       c->diag      = a->diag;
3130       c->free_diag = PETSC_FALSE;
3131     } else {
3132       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3133       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3134       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3135       c->free_diag = PETSC_TRUE;
3136     }
3137   } else c->diag = 0;
3138 
3139   c->nz         = a->nz;
3140   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3141   c->solve_work = NULL;
3142   c->mult_work  = NULL;
3143   c->sor_workt  = NULL;
3144   c->sor_work   = NULL;
3145 
3146   c->compressedrow.use   = a->compressedrow.use;
3147   c->compressedrow.nrows = a->compressedrow.nrows;
3148   if (a->compressedrow.use) {
3149     i    = a->compressedrow.nrows;
3150     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3151     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3153     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3154   } else {
3155     c->compressedrow.use    = PETSC_FALSE;
3156     c->compressedrow.i      = NULL;
3157     c->compressedrow.rindex = NULL;
3158   }
3159   C->nonzerostate = A->nonzerostate;
3160 
3161   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3166 {
3167   PetscErrorCode ierr;
3168 
3169   PetscFunctionBegin;
3170   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3171   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3172   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3173   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3174   PetscFunctionReturn(0);
3175 }
3176 
3177 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3178 {
3179   Mat_SeqBAIJ    *a;
3180   PetscErrorCode ierr;
3181   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3182   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3183   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3184   PetscInt       *masked,nmask,tmp,bs2,ishift;
3185   PetscMPIInt    size;
3186   int            fd;
3187   PetscScalar    *aa;
3188   MPI_Comm       comm;
3189   PetscBool      isbinary;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3193   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3194 
3195   /* force binary viewer to load .info file if it has not yet done so */
3196   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3197   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3198   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3199   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3200   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3201   if (bs < 0) bs = 1;
3202   bs2  = bs*bs;
3203 
3204   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3205   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3206   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3207   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3208   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3209   M = header[1]; N = header[2]; nz = header[3];
3210 
3211   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3212   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3213 
3214   /*
3215      This code adds extra rows to make sure the number of rows is
3216     divisible by the blocksize
3217   */
3218   mbs        = M/bs;
3219   extra_rows = bs - M + bs*(mbs);
3220   if (extra_rows == bs) extra_rows = 0;
3221   else mbs++;
3222   if (extra_rows) {
3223     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3224   }
3225 
3226   /* Set global sizes if not already set */
3227   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3228     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3229   } else { /* Check if the matrix global sizes are correct */
3230     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3231     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3232       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3233     }
3234     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3235   }
3236 
3237   /* read in row lengths */
3238   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3241 
3242   /* read in column indices */
3243   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3244   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3245   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3246 
3247   /* loop over row lengths determining block row lengths */
3248   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3249   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3250   ierr     = PetscArrayzero(mask,mbs);CHKERRQ(ierr);
3251   rowcount = 0;
3252   nzcount  = 0;
3253   for (i=0; i<mbs; i++) {
3254     nmask = 0;
3255     for (j=0; j<bs; j++) {
3256       kmax = rowlengths[rowcount];
3257       for (k=0; k<kmax; k++) {
3258         tmp = jj[nzcount++]/bs;
3259         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3260       }
3261       rowcount++;
3262     }
3263     browlengths[i] += nmask;
3264     /* zero out the mask elements we set */
3265     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3266   }
3267 
3268   /* Do preallocation  */
3269   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3270   a    = (Mat_SeqBAIJ*)newmat->data;
3271 
3272   /* set matrix "i" values */
3273   a->i[0] = 0;
3274   for (i=1; i<= mbs; i++) {
3275     a->i[i]      = a->i[i-1] + browlengths[i-1];
3276     a->ilen[i-1] = browlengths[i-1];
3277   }
3278   a->nz = 0;
3279   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3280 
3281   /* read in nonzero values */
3282   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3283   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3284   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3285 
3286   /* set "a" and "j" values into matrix */
3287   nzcount = 0; jcount = 0;
3288   for (i=0; i<mbs; i++) {
3289     nzcountb = nzcount;
3290     nmask    = 0;
3291     for (j=0; j<bs; j++) {
3292       kmax = rowlengths[i*bs+j];
3293       for (k=0; k<kmax; k++) {
3294         tmp = jj[nzcount++]/bs;
3295         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3296       }
3297     }
3298     /* sort the masked values */
3299     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3300 
3301     /* set "j" values into matrix */
3302     maskcount = 1;
3303     for (j=0; j<nmask; j++) {
3304       a->j[jcount++]  = masked[j];
3305       mask[masked[j]] = maskcount++;
3306     }
3307     /* set "a" values into matrix */
3308     ishift = bs2*a->i[i];
3309     for (j=0; j<bs; j++) {
3310       kmax = rowlengths[i*bs+j];
3311       for (k=0; k<kmax; k++) {
3312         tmp       = jj[nzcountb]/bs;
3313         block     = mask[tmp] - 1;
3314         point     = jj[nzcountb] - bs*tmp;
3315         idx       = ishift + bs2*block + j + bs*point;
3316         a->a[idx] = (MatScalar)aa[nzcountb++];
3317       }
3318     }
3319     /* zero out the mask elements we set */
3320     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3321   }
3322   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3323 
3324   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3326   ierr = PetscFree(aa);CHKERRQ(ierr);
3327   ierr = PetscFree(jj);CHKERRQ(ierr);
3328   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3329 
3330   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3337    compressed row) format.  For good matrix assembly performance the
3338    user should preallocate the matrix storage by setting the parameter nz
3339    (or the array nnz).  By setting these parameters accurately, performance
3340    during matrix assembly can be increased by more than a factor of 50.
3341 
3342    Collective
3343 
3344    Input Parameters:
3345 +  comm - MPI communicator, set to PETSC_COMM_SELF
3346 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3347           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3348 .  m - number of rows
3349 .  n - number of columns
3350 .  nz - number of nonzero blocks  per block row (same for all rows)
3351 -  nnz - array containing the number of nonzero blocks in the various block rows
3352          (possibly different for each block row) or NULL
3353 
3354    Output Parameter:
3355 .  A - the matrix
3356 
3357    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3358    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3359    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3360 
3361    Options Database Keys:
3362 .   -mat_no_unroll - uses code that does not unroll the loops in the
3363                      block calculations (much slower)
3364 .    -mat_block_size - size of the blocks to use
3365 
3366    Level: intermediate
3367 
3368    Notes:
3369    The number of rows and columns must be divisible by blocksize.
3370 
3371    If the nnz parameter is given then the nz parameter is ignored
3372 
3373    A nonzero block is any block that as 1 or more nonzeros in it
3374 
3375    The block AIJ format is fully compatible with standard Fortran 77
3376    storage.  That is, the stored row and column indices can begin at
3377    either one (as in Fortran) or zero.  See the users' manual for details.
3378 
3379    Specify the preallocated storage with either nz or nnz (not both).
3380    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3381    allocation.  See Users-Manual: ch_mat for details.
3382    matrices.
3383 
3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3385 @*/
3386 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3387 {
3388   PetscErrorCode ierr;
3389 
3390   PetscFunctionBegin;
3391   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3392   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3393   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3394   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3395   PetscFunctionReturn(0);
3396 }
3397 
3398 /*@C
3399    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3400    per row in the matrix. For good matrix assembly performance the
3401    user should preallocate the matrix storage by setting the parameter nz
3402    (or the array nnz).  By setting these parameters accurately, performance
3403    during matrix assembly can be increased by more than a factor of 50.
3404 
3405    Collective
3406 
3407    Input Parameters:
3408 +  B - the matrix
3409 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3410           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3411 .  nz - number of block nonzeros per block row (same for all rows)
3412 -  nnz - array containing the number of block nonzeros in the various block rows
3413          (possibly different for each block row) or NULL
3414 
3415    Options Database Keys:
3416 .   -mat_no_unroll - uses code that does not unroll the loops in the
3417                      block calculations (much slower)
3418 .   -mat_block_size - size of the blocks to use
3419 
3420    Level: intermediate
3421 
3422    Notes:
3423    If the nnz parameter is given then the nz parameter is ignored
3424 
3425    You can call MatGetInfo() to get information on how effective the preallocation was;
3426    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3427    You can also run with the option -info and look for messages with the string
3428    malloc in them to see if additional memory allocation was needed.
3429 
3430    The block AIJ format is fully compatible with standard Fortran 77
3431    storage.  That is, the stored row and column indices can begin at
3432    either one (as in Fortran) or zero.  See the users' manual for details.
3433 
3434    Specify the preallocated storage with either nz or nnz (not both).
3435    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3436    allocation.  See Users-Manual: ch_mat for details.
3437 
3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3439 @*/
3440 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3441 {
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3446   PetscValidType(B,1);
3447   PetscValidLogicalCollectiveInt(B,bs,2);
3448   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 /*@C
3453    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3454    (the default sequential PETSc format).
3455 
3456    Collective
3457 
3458    Input Parameters:
3459 +  B - the matrix
3460 .  i - the indices into j for the start of each local row (starts with zero)
3461 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3462 -  v - optional values in the matrix
3463 
3464    Level: developer
3465 
3466    Notes:
3467    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3468    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3469    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3470    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3471    block column and the second index is over columns within a block.
3472 
3473 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3474 @*/
3475 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3476 {
3477   PetscErrorCode ierr;
3478 
3479   PetscFunctionBegin;
3480   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3481   PetscValidType(B,1);
3482   PetscValidLogicalCollectiveInt(B,bs,2);
3483   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 
3488 /*@
3489      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3490 
3491      Collective
3492 
3493    Input Parameters:
3494 +  comm - must be an MPI communicator of size 1
3495 .  bs - size of block
3496 .  m - number of rows
3497 .  n - number of columns
3498 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3499 .  j - column indices
3500 -  a - matrix values
3501 
3502    Output Parameter:
3503 .  mat - the matrix
3504 
3505    Level: advanced
3506 
3507    Notes:
3508        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3509     once the matrix is destroyed
3510 
3511        You cannot set new nonzero locations into this matrix, that will generate an error.
3512 
3513        The i and j indices are 0 based
3514 
3515        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3516 
3517       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3518       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3519       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3520       with column-major ordering within blocks.
3521 
3522 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3523 
3524 @*/
3525 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3526 {
3527   PetscErrorCode ierr;
3528   PetscInt       ii;
3529   Mat_SeqBAIJ    *baij;
3530 
3531   PetscFunctionBegin;
3532   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3533   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3534 
3535   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3536   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3537   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3538   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3539   baij = (Mat_SeqBAIJ*)(*mat)->data;
3540   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3541   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3542 
3543   baij->i = i;
3544   baij->j = j;
3545   baij->a = a;
3546 
3547   baij->singlemalloc = PETSC_FALSE;
3548   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3549   baij->free_a       = PETSC_FALSE;
3550   baij->free_ij      = PETSC_FALSE;
3551 
3552   for (ii=0; ii<m; ii++) {
3553     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3554 #if defined(PETSC_USE_DEBUG)
3555     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3556 #endif
3557   }
3558 #if defined(PETSC_USE_DEBUG)
3559   for (ii=0; ii<baij->i[m]; ii++) {
3560     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3561     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3562   }
3563 #endif
3564 
3565   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3571 {
3572   PetscErrorCode ierr;
3573   PetscMPIInt    size;
3574 
3575   PetscFunctionBegin;
3576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3577   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3578     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3579   } else {
3580     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3581   }
3582   PetscFunctionReturn(0);
3583 }
3584