xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1230317d47e50dc0bffca96e8ee76ec386405dd1)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17 #endif
18 
19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20 {
21   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
22   PetscErrorCode ierr;
23   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
25   PetscReal      shift = 0.0;
26   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
27 
28   PetscFunctionBegin;
29   allowzeropivot = PetscNot(A->erroriffailure);
30 
31   if (a->idiagvalid) {
32     if (values) *values = a->idiag;
33     PetscFunctionReturn(0);
34   }
35   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
36   diag_offset = a->diag;
37   if (!a->idiag) {
38     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
39     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
40   }
41   diag  = a->idiag;
42   if (values) *values = a->idiag;
43   /* factor and invert each block */
44   switch (bs) {
45   case 1:
46     for (i=0; i<mbs; i++) {
47       odiag    = v + 1*diag_offset[i];
48       diag[0]  = odiag[0];
49 
50       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
51         if (allowzeropivot) {
52           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
53           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
54           A->factorerror_zeropivot_row   = i;
55           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
56         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
57       }
58 
59       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
60       diag    += 1;
61     }
62     break;
63   case 2:
64     for (i=0; i<mbs; i++) {
65       odiag    = v + 4*diag_offset[i];
66       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70     }
71     break;
72   case 3:
73     for (i=0; i<mbs; i++) {
74       odiag    = v + 9*diag_offset[i];
75       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
76       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
77       diag[8]  = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81     }
82     break;
83   case 4:
84     for (i=0; i<mbs; i++) {
85       odiag  = v + 16*diag_offset[i];
86       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
87       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
88       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
89       diag  += 16;
90     }
91     break;
92   case 5:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 25*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
97       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
98       diag  += 25;
99     }
100     break;
101   case 6:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 36*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
106       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
107       diag  += 36;
108     }
109     break;
110   case 7:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 49*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116       diag  += 49;
117     }
118     break;
119   default:
120     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
121     for (i=0; i<mbs; i++) {
122       odiag  = v + bs2*diag_offset[i];
123       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
125       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126       diag  += bs2;
127     }
128     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
135 {
136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
137   PetscScalar       *x,*work,*w,*workt,*t;
138   const MatScalar   *v,*aa = a->a, *idiag;
139   const PetscScalar *b,*xb;
140   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
141   PetscErrorCode    ierr;
142   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
143   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
144 
145   PetscFunctionBegin;
146   its = its*lits;
147   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
148   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
149   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
150   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
151   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
152 
153   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
154 
155   if (!m) PetscFunctionReturn(0);
156   diag  = a->diag;
157   idiag = a->idiag;
158   k    = PetscMax(A->rmap->n,A->cmap->n);
159   if (!a->mult_work) {
160     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
161   }
162   if (!a->sor_workt) {
163     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
164   }
165   if (!a->sor_work) {
166     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
167   }
168   work = a->mult_work;
169   t    = a->sor_workt;
170   w    = a->sor_work;
171 
172   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
173   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
174 
175   if (flag & SOR_ZERO_INITIAL_GUESS) {
176     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
177       switch (bs) {
178       case 1:
179         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
180         t[0] = b[0];
181         i2     = 1;
182         idiag += 1;
183         for (i=1; i<m; i++) {
184           v  = aa + ai[i];
185           vi = aj + ai[i];
186           nz = diag[i] - ai[i];
187           s[0] = b[i2];
188           for (j=0; j<nz; j++) {
189             xw[0] = x[vi[j]];
190             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
191           }
192           t[i2] = s[0];
193           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
194           x[i2]  = xw[0];
195           idiag += 1;
196           i2    += 1;
197         }
198         break;
199       case 2:
200         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
201         t[0] = b[0]; t[1] = b[1];
202         i2     = 2;
203         idiag += 4;
204         for (i=1; i<m; i++) {
205           v  = aa + 4*ai[i];
206           vi = aj + ai[i];
207           nz = diag[i] - ai[i];
208           s[0] = b[i2]; s[1] = b[i2+1];
209           for (j=0; j<nz; j++) {
210             idx = 2*vi[j];
211             it  = 4*j;
212             xw[0] = x[idx]; xw[1] = x[1+idx];
213             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
214           }
215           t[i2] = s[0]; t[i2+1] = s[1];
216           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
217           x[i2]   = xw[0]; x[i2+1] = xw[1];
218           idiag  += 4;
219           i2     += 2;
220         }
221         break;
222       case 3:
223         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
224         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
225         i2     = 3;
226         idiag += 9;
227         for (i=1; i<m; i++) {
228           v  = aa + 9*ai[i];
229           vi = aj + ai[i];
230           nz = diag[i] - ai[i];
231           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
232           while (nz--) {
233             idx = 3*(*vi++);
234             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
235             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
236             v  += 9;
237           }
238           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
239           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
240           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
241           idiag  += 9;
242           i2     += 3;
243         }
244         break;
245       case 4:
246         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
247         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
248         i2     = 4;
249         idiag += 16;
250         for (i=1; i<m; i++) {
251           v  = aa + 16*ai[i];
252           vi = aj + ai[i];
253           nz = diag[i] - ai[i];
254           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
255           while (nz--) {
256             idx = 4*(*vi++);
257             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
258             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
259             v  += 16;
260           }
261           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
262           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
263           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
264           idiag  += 16;
265           i2     += 4;
266         }
267         break;
268       case 5:
269         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
270         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
271         i2     = 5;
272         idiag += 25;
273         for (i=1; i<m; i++) {
274           v  = aa + 25*ai[i];
275           vi = aj + ai[i];
276           nz = diag[i] - ai[i];
277           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
278           while (nz--) {
279             idx = 5*(*vi++);
280             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
281             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
282             v  += 25;
283           }
284           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
285           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
286           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
287           idiag  += 25;
288           i2     += 5;
289         }
290         break;
291       case 6:
292         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
293         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
294         i2     = 6;
295         idiag += 36;
296         for (i=1; i<m; i++) {
297           v  = aa + 36*ai[i];
298           vi = aj + ai[i];
299           nz = diag[i] - ai[i];
300           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
301           while (nz--) {
302             idx = 6*(*vi++);
303             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
304             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
305             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
306             v  += 36;
307           }
308           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
309           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
310           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
311           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
312           idiag  += 36;
313           i2     += 6;
314         }
315         break;
316       case 7:
317         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
318         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
319         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
320         i2     = 7;
321         idiag += 49;
322         for (i=1; i<m; i++) {
323           v  = aa + 49*ai[i];
324           vi = aj + ai[i];
325           nz = diag[i] - ai[i];
326           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
327           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
328           while (nz--) {
329             idx = 7*(*vi++);
330             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
331             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
332             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
333             v  += 49;
334           }
335           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
336           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
337           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
338           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
339           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
340           idiag  += 49;
341           i2     += 7;
342         }
343         break;
344       default:
345         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
346         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347         i2     = bs;
348         idiag += bs2;
349         for (i=1; i<m; i++) {
350           v  = aa + bs2*ai[i];
351           vi = aj + ai[i];
352           nz = diag[i] - ai[i];
353 
354           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           /* copy all rows of x that are needed into contiguous space */
356           workt = work;
357           for (j=0; j<nz; j++) {
358             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
359             workt += bs;
360           }
361           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
362           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
363           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
364 
365           idiag += bs2;
366           i2    += bs;
367         }
368         break;
369       }
370       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
371       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
372       xb = t;
373     }
374     else xb = b;
375     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
376       idiag = a->idiag+bs2*(a->mbs-1);
377       i2 = bs * (m-1);
378       switch (bs) {
379       case 1:
380         s[0]  = xb[i2];
381         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
382         x[i2] = xw[0];
383         i2   -= 1;
384         for (i=m-2; i>=0; i--) {
385           v  = aa + (diag[i]+1);
386           vi = aj + diag[i] + 1;
387           nz = ai[i+1] - diag[i] - 1;
388           s[0] = xb[i2];
389           for (j=0; j<nz; j++) {
390             xw[0] = x[vi[j]];
391             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
392           }
393           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
394           x[i2]  = xw[0];
395           idiag -= 1;
396           i2    -= 1;
397         }
398         break;
399       case 2:
400         s[0]  = xb[i2]; s[1] = xb[i2+1];
401         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
402         x[i2] = xw[0]; x[i2+1] = xw[1];
403         i2    -= 2;
404         idiag -= 4;
405         for (i=m-2; i>=0; i--) {
406           v  = aa + 4*(diag[i] + 1);
407           vi = aj + diag[i] + 1;
408           nz = ai[i+1] - diag[i] - 1;
409           s[0] = xb[i2]; s[1] = xb[i2+1];
410           for (j=0; j<nz; j++) {
411             idx = 2*vi[j];
412             it  = 4*j;
413             xw[0] = x[idx]; xw[1] = x[1+idx];
414             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
415           }
416           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
417           x[i2]   = xw[0]; x[i2+1] = xw[1];
418           idiag  -= 4;
419           i2     -= 2;
420         }
421         break;
422       case 3:
423         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
424         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
425         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
426         i2    -= 3;
427         idiag -= 9;
428         for (i=m-2; i>=0; i--) {
429           v  = aa + 9*(diag[i]+1);
430           vi = aj + diag[i] + 1;
431           nz = ai[i+1] - diag[i] - 1;
432           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
433           while (nz--) {
434             idx = 3*(*vi++);
435             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
436             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
437             v  += 9;
438           }
439           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
440           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
441           idiag  -= 9;
442           i2     -= 3;
443         }
444         break;
445       case 4:
446         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
447         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
448         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
449         i2    -= 4;
450         idiag -= 16;
451         for (i=m-2; i>=0; i--) {
452           v  = aa + 16*(diag[i]+1);
453           vi = aj + diag[i] + 1;
454           nz = ai[i+1] - diag[i] - 1;
455           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
456           while (nz--) {
457             idx = 4*(*vi++);
458             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
459             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
460             v  += 16;
461           }
462           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
463           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
464           idiag  -= 16;
465           i2     -= 4;
466         }
467         break;
468       case 5:
469         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
470         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
471         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
472         i2    -= 5;
473         idiag -= 25;
474         for (i=m-2; i>=0; i--) {
475           v  = aa + 25*(diag[i]+1);
476           vi = aj + diag[i] + 1;
477           nz = ai[i+1] - diag[i] - 1;
478           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
479           while (nz--) {
480             idx = 5*(*vi++);
481             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
482             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
483             v  += 25;
484           }
485           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
486           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
487           idiag  -= 25;
488           i2     -= 5;
489         }
490         break;
491       case 6:
492         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
493         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
494         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
495         i2    -= 6;
496         idiag -= 36;
497         for (i=m-2; i>=0; i--) {
498           v  = aa + 36*(diag[i]+1);
499           vi = aj + diag[i] + 1;
500           nz = ai[i+1] - diag[i] - 1;
501           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
502           while (nz--) {
503             idx = 6*(*vi++);
504             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
505             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
506             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
507             v  += 36;
508           }
509           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511           idiag  -= 36;
512           i2     -= 6;
513         }
514         break;
515       case 7:
516         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
517         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
518         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
519         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
520         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
521         i2    -= 7;
522         idiag -= 49;
523         for (i=m-2; i>=0; i--) {
524           v  = aa + 49*(diag[i]+1);
525           vi = aj + diag[i] + 1;
526           nz = ai[i+1] - diag[i] - 1;
527           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
528           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
529           while (nz--) {
530             idx = 7*(*vi++);
531             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
532             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
533             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
534             v  += 49;
535           }
536           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
537           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
538           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
539           idiag  -= 49;
540           i2     -= 7;
541         }
542         break;
543       default:
544         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
545         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
546         i2    -= bs;
547         idiag -= bs2;
548         for (i=m-2; i>=0; i--) {
549           v  = aa + bs2*(diag[i]+1);
550           vi = aj + diag[i] + 1;
551           nz = ai[i+1] - diag[i] - 1;
552 
553           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
554           /* copy all rows of x that are needed into contiguous space */
555           workt = work;
556           for (j=0; j<nz; j++) {
557             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
558             workt += bs;
559           }
560           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
561           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562 
563           idiag -= bs2;
564           i2    -= bs;
565         }
566         break;
567       }
568       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
569     }
570     its--;
571   }
572   while (its--) {
573     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
574       idiag = a->idiag;
575       i2 = 0;
576       switch (bs) {
577       case 1:
578         for (i=0; i<m; i++) {
579           v  = aa + ai[i];
580           vi = aj + ai[i];
581           nz = ai[i+1] - ai[i];
582           s[0] = b[i2];
583           for (j=0; j<nz; j++) {
584             xw[0] = x[vi[j]];
585             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
586           }
587           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
588           x[i2] += xw[0];
589           idiag += 1;
590           i2    += 1;
591         }
592         break;
593       case 2:
594         for (i=0; i<m; i++) {
595           v  = aa + 4*ai[i];
596           vi = aj + ai[i];
597           nz = ai[i+1] - ai[i];
598           s[0] = b[i2]; s[1] = b[i2+1];
599           for (j=0; j<nz; j++) {
600             idx = 2*vi[j];
601             it  = 4*j;
602             xw[0] = x[idx]; xw[1] = x[1+idx];
603             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
604           }
605           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
606           x[i2]  += xw[0]; x[i2+1] += xw[1];
607           idiag  += 4;
608           i2     += 2;
609         }
610         break;
611       case 3:
612         for (i=0; i<m; i++) {
613           v  = aa + 9*ai[i];
614           vi = aj + ai[i];
615           nz = ai[i+1] - ai[i];
616           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
617           while (nz--) {
618             idx = 3*(*vi++);
619             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
620             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
621             v  += 9;
622           }
623           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
624           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
625           idiag  += 9;
626           i2     += 3;
627         }
628         break;
629       case 4:
630         for (i=0; i<m; i++) {
631           v  = aa + 16*ai[i];
632           vi = aj + ai[i];
633           nz = ai[i+1] - ai[i];
634           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
635           while (nz--) {
636             idx = 4*(*vi++);
637             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
638             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
639             v  += 16;
640           }
641           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
642           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
643           idiag  += 16;
644           i2     += 4;
645         }
646         break;
647       case 5:
648         for (i=0; i<m; i++) {
649           v  = aa + 25*ai[i];
650           vi = aj + ai[i];
651           nz = ai[i+1] - ai[i];
652           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
653           while (nz--) {
654             idx = 5*(*vi++);
655             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
656             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
657             v  += 25;
658           }
659           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
660           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
661           idiag  += 25;
662           i2     += 5;
663         }
664         break;
665       case 6:
666         for (i=0; i<m; i++) {
667           v  = aa + 36*ai[i];
668           vi = aj + ai[i];
669           nz = ai[i+1] - ai[i];
670           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
671           while (nz--) {
672             idx = 6*(*vi++);
673             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
674             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
675             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
676             v  += 36;
677           }
678           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
679           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
680           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
681           idiag  += 36;
682           i2     += 6;
683         }
684         break;
685       case 7:
686         for (i=0; i<m; i++) {
687           v  = aa + 49*ai[i];
688           vi = aj + ai[i];
689           nz = ai[i+1] - ai[i];
690           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
691           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
692           while (nz--) {
693             idx = 7*(*vi++);
694             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
695             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
696             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
697             v  += 49;
698           }
699           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
700           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
701           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
702           idiag  += 49;
703           i2     += 7;
704         }
705         break;
706       default:
707         for (i=0; i<m; i++) {
708           v  = aa + bs2*ai[i];
709           vi = aj + ai[i];
710           nz = ai[i+1] - ai[i];
711 
712           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
713           /* copy all rows of x that are needed into contiguous space */
714           workt = work;
715           for (j=0; j<nz; j++) {
716             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
717             workt += bs;
718           }
719           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
720           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
721 
722           idiag += bs2;
723           i2    += bs;
724         }
725         break;
726       }
727       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
728     }
729     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
730       idiag = a->idiag+bs2*(a->mbs-1);
731       i2 = bs * (m-1);
732       switch (bs) {
733       case 1:
734         for (i=m-1; i>=0; i--) {
735           v  = aa + ai[i];
736           vi = aj + ai[i];
737           nz = ai[i+1] - ai[i];
738           s[0] = b[i2];
739           for (j=0; j<nz; j++) {
740             xw[0] = x[vi[j]];
741             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
742           }
743           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
744           x[i2] += xw[0];
745           idiag -= 1;
746           i2    -= 1;
747         }
748         break;
749       case 2:
750         for (i=m-1; i>=0; i--) {
751           v  = aa + 4*ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2]; s[1] = b[i2+1];
755           for (j=0; j<nz; j++) {
756             idx = 2*vi[j];
757             it  = 4*j;
758             xw[0] = x[idx]; xw[1] = x[1+idx];
759             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
760           }
761           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
762           x[i2]  += xw[0]; x[i2+1] += xw[1];
763           idiag  -= 4;
764           i2     -= 2;
765         }
766         break;
767       case 3:
768         for (i=m-1; i>=0; i--) {
769           v  = aa + 9*ai[i];
770           vi = aj + ai[i];
771           nz = ai[i+1] - ai[i];
772           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
773           while (nz--) {
774             idx = 3*(*vi++);
775             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
776             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
777             v  += 9;
778           }
779           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
780           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
781           idiag  -= 9;
782           i2     -= 3;
783         }
784         break;
785       case 4:
786         for (i=m-1; i>=0; i--) {
787           v  = aa + 16*ai[i];
788           vi = aj + ai[i];
789           nz = ai[i+1] - ai[i];
790           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
791           while (nz--) {
792             idx = 4*(*vi++);
793             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
794             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
795             v  += 16;
796           }
797           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
798           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
799           idiag  -= 16;
800           i2     -= 4;
801         }
802         break;
803       case 5:
804         for (i=m-1; i>=0; i--) {
805           v  = aa + 25*ai[i];
806           vi = aj + ai[i];
807           nz = ai[i+1] - ai[i];
808           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
809           while (nz--) {
810             idx = 5*(*vi++);
811             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
812             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
813             v  += 25;
814           }
815           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
816           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
817           idiag  -= 25;
818           i2     -= 5;
819         }
820         break;
821       case 6:
822         for (i=m-1; i>=0; i--) {
823           v  = aa + 36*ai[i];
824           vi = aj + ai[i];
825           nz = ai[i+1] - ai[i];
826           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
827           while (nz--) {
828             idx = 6*(*vi++);
829             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
830             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
831             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
832             v  += 36;
833           }
834           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
835           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
836           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
837           idiag  -= 36;
838           i2     -= 6;
839         }
840         break;
841       case 7:
842         for (i=m-1; i>=0; i--) {
843           v  = aa + 49*ai[i];
844           vi = aj + ai[i];
845           nz = ai[i+1] - ai[i];
846           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
847           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
848           while (nz--) {
849             idx = 7*(*vi++);
850             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
851             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
852             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
853             v  += 49;
854           }
855           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
856           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
857           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
858           idiag  -= 49;
859           i2     -= 7;
860         }
861         break;
862       default:
863         for (i=m-1; i>=0; i--) {
864           v  = aa + bs2*ai[i];
865           vi = aj + ai[i];
866           nz = ai[i+1] - ai[i];
867 
868           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
869           /* copy all rows of x that are needed into contiguous space */
870           workt = work;
871           for (j=0; j<nz; j++) {
872             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
873             workt += bs;
874           }
875           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
876           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
877 
878           idiag -= bs2;
879           i2    -= bs;
880         }
881         break;
882       }
883       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
884     }
885   }
886   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 
892 /*
893     Special version for direct calls from Fortran (Used in PETSc-fun3d)
894 */
895 #if defined(PETSC_HAVE_FORTRAN_CAPS)
896 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
897 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
898 #define matsetvaluesblocked4_ matsetvaluesblocked4
899 #endif
900 
901 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
902 {
903   Mat               A  = *AA;
904   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
905   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
906   PetscInt          *ai    =a->i,*ailen=a->ilen;
907   PetscInt          *aj    =a->j,stepval,lastcol = -1;
908   const PetscScalar *value = v;
909   MatScalar         *ap,*aa = a->a,*bap;
910 
911   PetscFunctionBegin;
912   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
913   stepval = (n-1)*4;
914   for (k=0; k<m; k++) { /* loop over added rows */
915     row  = im[k];
916     rp   = aj + ai[row];
917     ap   = aa + 16*ai[row];
918     nrow = ailen[row];
919     low  = 0;
920     high = nrow;
921     for (l=0; l<n; l++) { /* loop over added columns */
922       col = in[l];
923       if (col <= lastcol)  low = 0;
924       else                high = nrow;
925       lastcol = col;
926       value   = v + k*(stepval+4 + l)*4;
927       while (high-low > 7) {
928         t = (low+high)/2;
929         if (rp[t] > col) high = t;
930         else             low  = t;
931       }
932       for (i=low; i<high; i++) {
933         if (rp[i] > col) break;
934         if (rp[i] == col) {
935           bap = ap +  16*i;
936           for (ii=0; ii<4; ii++,value+=stepval) {
937             for (jj=ii; jj<16; jj+=4) {
938               bap[jj] += *value++;
939             }
940           }
941           goto noinsert2;
942         }
943       }
944       N = nrow++ - 1;
945       high++; /* added new column index thus must search to one higher than before */
946       /* shift up all the later entries in this row */
947       for (ii=N; ii>=i; ii--) {
948         rp[ii+1] = rp[ii];
949         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
950       }
951       if (N >= i) {
952         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
953       }
954       rp[i] = col;
955       bap   = ap +  16*i;
956       for (ii=0; ii<4; ii++,value+=stepval) {
957         for (jj=ii; jj<16; jj+=4) {
958           bap[jj] = *value++;
959         }
960       }
961       noinsert2:;
962       low = i;
963     }
964     ailen[row] = nrow;
965   }
966   PetscFunctionReturnVoid();
967 }
968 
969 #if defined(PETSC_HAVE_FORTRAN_CAPS)
970 #define matsetvalues4_ MATSETVALUES4
971 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
972 #define matsetvalues4_ matsetvalues4
973 #endif
974 
975 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
976 {
977   Mat         A  = *AA;
978   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
979   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
980   PetscInt    *ai=a->i,*ailen=a->ilen;
981   PetscInt    *aj=a->j,brow,bcol;
982   PetscInt    ridx,cidx,lastcol = -1;
983   MatScalar   *ap,value,*aa=a->a,*bap;
984 
985   PetscFunctionBegin;
986   for (k=0; k<m; k++) { /* loop over added rows */
987     row  = im[k]; brow = row/4;
988     rp   = aj + ai[brow];
989     ap   = aa + 16*ai[brow];
990     nrow = ailen[brow];
991     low  = 0;
992     high = nrow;
993     for (l=0; l<n; l++) { /* loop over added columns */
994       col   = in[l]; bcol = col/4;
995       ridx  = row % 4; cidx = col % 4;
996       value = v[l + k*n];
997       if (col <= lastcol)  low = 0;
998       else                high = nrow;
999       lastcol = col;
1000       while (high-low > 7) {
1001         t = (low+high)/2;
1002         if (rp[t] > bcol) high = t;
1003         else              low  = t;
1004       }
1005       for (i=low; i<high; i++) {
1006         if (rp[i] > bcol) break;
1007         if (rp[i] == bcol) {
1008           bap   = ap +  16*i + 4*cidx + ridx;
1009           *bap += value;
1010           goto noinsert1;
1011         }
1012       }
1013       N = nrow++ - 1;
1014       high++; /* added new column thus must search to one higher than before */
1015       /* shift up all the later entries in this row */
1016       for (ii=N; ii>=i; ii--) {
1017         rp[ii+1] = rp[ii];
1018         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1019       }
1020       if (N>=i) {
1021         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1022       }
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1049   } else {
1050     diag = a->diag;
1051     for (i=0; i<a->mbs; i++) {
1052       if (diag[i] >= ii[i+1]) {
1053         *missing = PETSC_TRUE;
1054         if (d) *d = i;
1055         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1056         break;
1057       }
1058     }
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1089 {
1090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1093   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1094 
1095   PetscFunctionBegin;
1096   *nn = n;
1097   if (!ia) PetscFunctionReturn(0);
1098   if (symmetric) {
1099     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1100     nz   = tia[n];
1101   } else {
1102     tia = a->i; tja = a->j;
1103   }
1104 
1105   if (!blockcompressed && bs > 1) {
1106     (*nn) *= bs;
1107     /* malloc & create the natural set of indices */
1108     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1109     if (n) {
1110       (*ia)[0] = oshift;
1111       for (j=1; j<bs; j++) {
1112         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1113       }
1114     }
1115 
1116     for (i=1; i<n; i++) {
1117       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1118       for (j=1; j<bs; j++) {
1119         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1120       }
1121     }
1122     if (n) {
1123       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1124     }
1125 
1126     if (inja) {
1127       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1128       cnt = 0;
1129       for (i=0; i<n; i++) {
1130         for (j=0; j<bs; j++) {
1131           for (k=tia[i]; k<tia[i+1]; k++) {
1132             for (l=0; l<bs; l++) {
1133               (*ja)[cnt++] = bs*tja[k] + l;
1134             }
1135           }
1136         }
1137       }
1138     }
1139 
1140     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1141       ierr = PetscFree(tia);CHKERRQ(ierr);
1142       ierr = PetscFree(tja);CHKERRQ(ierr);
1143     }
1144   } else if (oshift == 1) {
1145     if (symmetric) {
1146       nz = tia[A->rmap->n/bs];
1147       /*  add 1 to i and j indices */
1148       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1149       *ia = tia;
1150       if (ja) {
1151         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1152         *ja = tja;
1153       }
1154     } else {
1155       nz = a->i[A->rmap->n/bs];
1156       /* malloc space and  add 1 to i and j indices */
1157       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1158       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1159       if (ja) {
1160         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1161         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1162       }
1163     }
1164   } else {
1165     *ia = tia;
1166     if (ja) *ja = tja;
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   if (!ia) PetscFunctionReturn(0);
1177   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1178     ierr = PetscFree(*ia);CHKERRQ(ierr);
1179     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1185 {
1186   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190 #if defined(PETSC_USE_LOG)
1191   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1192 #endif
1193   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1194   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1196   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1197   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1198   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1200   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1204   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1205   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1206 
1207   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1208   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1209   ierr = PetscFree(A->data);CHKERRQ(ierr);
1210 
1211   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1222 #if defined(PETSC_HAVE_HYPRE)
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1224 #endif
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1229 {
1230   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   switch (op) {
1235   case MAT_ROW_ORIENTED:
1236     a->roworiented = flg;
1237     break;
1238   case MAT_KEEP_NONZERO_PATTERN:
1239     a->keepnonzeropattern = flg;
1240     break;
1241   case MAT_NEW_NONZERO_LOCATIONS:
1242     a->nonew = (flg ? 0 : 1);
1243     break;
1244   case MAT_NEW_NONZERO_LOCATION_ERR:
1245     a->nonew = (flg ? -1 : 0);
1246     break;
1247   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1248     a->nonew = (flg ? -2 : 0);
1249     break;
1250   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1251     a->nounused = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_DIAGONALS:
1254   case MAT_IGNORE_OFF_PROC_ENTRIES:
1255   case MAT_USE_HASH_TABLE:
1256     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1257     break;
1258   case MAT_SPD:
1259   case MAT_SYMMETRIC:
1260   case MAT_STRUCTURALLY_SYMMETRIC:
1261   case MAT_HERMITIAN:
1262   case MAT_SYMMETRY_ETERNAL:
1263   case MAT_SUBMAT_SINGLEIS:
1264   case MAT_STRUCTURE_ONLY:
1265     /* These options are handled directly by MatSetOption() */
1266     break;
1267   default:
1268     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1274 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1275 {
1276   PetscErrorCode ierr;
1277   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1278   MatScalar      *aa_i;
1279   PetscScalar    *v_i;
1280 
1281   PetscFunctionBegin;
1282   bs  = A->rmap->bs;
1283   bs2 = bs*bs;
1284   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1285 
1286   bn  = row/bs;   /* Block number */
1287   bp  = row % bs; /* Block Position */
1288   M   = ai[bn+1] - ai[bn];
1289   *nz = bs*M;
1290 
1291   if (v) {
1292     *v = 0;
1293     if (*nz) {
1294       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1295       for (i=0; i<M; i++) { /* for each block in the block row */
1296         v_i  = *v + i*bs;
1297         aa_i = aa + bs2*(ai[bn] + i);
1298         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1299       }
1300     }
1301   }
1302 
1303   if (idx) {
1304     *idx = 0;
1305     if (*nz) {
1306       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1307       for (i=0; i<M; i++) { /* for each block in the block row */
1308         idx_i = *idx + i*bs;
1309         itmp  = bs*aj[ai[bn] + i];
1310         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1311       }
1312     }
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1318 {
1319   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1333   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1338 {
1339   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1340   Mat            C;
1341   PetscErrorCode ierr;
1342   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1343   PetscInt       *rows,*cols,bs2=a->bs2;
1344   MatScalar      *array;
1345 
1346   PetscFunctionBegin;
1347   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1348     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1349 
1350     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1351     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1352     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1353     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1354     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1355     ierr = PetscFree(col);CHKERRQ(ierr);
1356   } else {
1357     C = *B;
1358   }
1359 
1360   array = a->a;
1361   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1362   for (i=0; i<mbs; i++) {
1363     cols[0] = i*bs;
1364     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1365     len = ai[i+1] - ai[i];
1366     for (j=0; j<len; j++) {
1367       rows[0] = (*aj++)*bs;
1368       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1369       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1370       array += bs2;
1371     }
1372   }
1373   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1374 
1375   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377 
1378   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1379     *B = C;
1380   } else {
1381     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1387 {
1388   PetscErrorCode ierr;
1389   Mat            Btrans;
1390 
1391   PetscFunctionBegin;
1392   *f   = PETSC_FALSE;
1393   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1394   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1395   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1400 {
1401   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1404   int            fd;
1405   PetscScalar    *aa;
1406   FILE           *file;
1407 
1408   PetscFunctionBegin;
1409   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1410   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1411   col_lens[0] = MAT_FILE_CLASSID;
1412 
1413   col_lens[1] = A->rmap->N;
1414   col_lens[2] = A->cmap->n;
1415   col_lens[3] = a->nz*bs2;
1416 
1417   /* store lengths of each row and write (including header) to file */
1418   count = 0;
1419   for (i=0; i<a->mbs; i++) {
1420     for (j=0; j<bs; j++) {
1421       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1422     }
1423   }
1424   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1425   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1426 
1427   /* store column indices (zero start index) */
1428   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1429   count = 0;
1430   for (i=0; i<a->mbs; i++) {
1431     for (j=0; j<bs; j++) {
1432       for (k=a->i[i]; k<a->i[i+1]; k++) {
1433         for (l=0; l<bs; l++) {
1434           jj[count++] = bs*a->j[k] + l;
1435         }
1436       }
1437     }
1438   }
1439   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1440   ierr = PetscFree(jj);CHKERRQ(ierr);
1441 
1442   /* store nonzero values */
1443   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1444   count = 0;
1445   for (i=0; i<a->mbs; i++) {
1446     for (j=0; j<bs; j++) {
1447       for (k=a->i[i]; k<a->i[i+1]; k++) {
1448         for (l=0; l<bs; l++) {
1449           aa[count++] = a->a[bs2*k + l*bs + j];
1450         }
1451       }
1452     }
1453   }
1454   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1455   ierr = PetscFree(aa);CHKERRQ(ierr);
1456 
1457   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1458   if (file) {
1459     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1465 {
1466   PetscErrorCode ierr;
1467   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1468   PetscInt       i,bs = A->rmap->bs,k;
1469 
1470   PetscFunctionBegin;
1471   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1472   for (i=0; i<a->mbs; i++) {
1473     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1474     for (k=a->i[i]; k<a->i[i+1]; k++) {
1475       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1476     }
1477     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1478   }
1479   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1484 {
1485   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1486   PetscErrorCode    ierr;
1487   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1488   PetscViewerFormat format;
1489 
1490   PetscFunctionBegin;
1491   if (A->structure_only) {
1492     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1493     PetscFunctionReturn(0);
1494   }
1495 
1496   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1497   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500     const char *matname;
1501     Mat        aij;
1502     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1503     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1504     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1505     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1507   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1508       PetscFunctionReturn(0);
1509   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1510     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1511     for (i=0; i<a->mbs; i++) {
1512       for (j=0; j<bs; j++) {
1513         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1514         for (k=a->i[i]; k<a->i[i+1]; k++) {
1515           for (l=0; l<bs; l++) {
1516 #if defined(PETSC_USE_COMPLEX)
1517             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1519                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1520             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1522                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1523             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             }
1526 #else
1527             if (a->a[bs2*k + l*bs + j] != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1529             }
1530 #endif
1531           }
1532         }
1533         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1534       }
1535     }
1536     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1537   } else {
1538     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1539     for (i=0; i<a->mbs; i++) {
1540       for (j=0; j<bs; j++) {
1541         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1542         for (k=a->i[i]; k<a->i[i+1]; k++) {
1543           for (l=0; l<bs; l++) {
1544 #if defined(PETSC_USE_COMPLEX)
1545             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1547                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1550                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             } else {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             }
1554 #else
1555             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1556 #endif
1557           }
1558         }
1559         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1560       }
1561     }
1562     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1563   }
1564   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #include <petscdraw.h>
1569 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1570 {
1571   Mat               A = (Mat) Aa;
1572   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1573   PetscErrorCode    ierr;
1574   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1575   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1576   MatScalar         *aa;
1577   PetscViewer       viewer;
1578   PetscViewerFormat format;
1579 
1580   PetscFunctionBegin;
1581   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1582   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1583   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1584 
1585   /* loop over matrix elements drawing boxes */
1586 
1587   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1588     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1589     /* Blue for negative, Cyan for zero and  Red for positive */
1590     color = PETSC_DRAW_BLUE;
1591     for (i=0,row=0; i<mbs; i++,row+=bs) {
1592       for (j=a->i[i]; j<a->i[i+1]; j++) {
1593         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1594         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1595         aa  = a->a + j*bs2;
1596         for (k=0; k<bs; k++) {
1597           for (l=0; l<bs; l++) {
1598             if (PetscRealPart(*aa++) >=  0.) continue;
1599             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1600           }
1601         }
1602       }
1603     }
1604     color = PETSC_DRAW_CYAN;
1605     for (i=0,row=0; i<mbs; i++,row+=bs) {
1606       for (j=a->i[i]; j<a->i[i+1]; j++) {
1607         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609         aa  = a->a + j*bs2;
1610         for (k=0; k<bs; k++) {
1611           for (l=0; l<bs; l++) {
1612             if (PetscRealPart(*aa++) != 0.) continue;
1613             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1614           }
1615         }
1616       }
1617     }
1618     color = PETSC_DRAW_RED;
1619     for (i=0,row=0; i<mbs; i++,row+=bs) {
1620       for (j=a->i[i]; j<a->i[i+1]; j++) {
1621         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1622         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1623         aa  = a->a + j*bs2;
1624         for (k=0; k<bs; k++) {
1625           for (l=0; l<bs; l++) {
1626             if (PetscRealPart(*aa++) <= 0.) continue;
1627             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1628           }
1629         }
1630       }
1631     }
1632     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1633   } else {
1634     /* use contour shading to indicate magnitude of values */
1635     /* first determine max of all nonzero values */
1636     PetscReal minv = 0.0, maxv = 0.0;
1637     PetscDraw popup;
1638 
1639     for (i=0; i<a->nz*a->bs2; i++) {
1640       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1641     }
1642     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1643     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1644     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1645 
1646     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1647     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651         aa  = a->a + j*bs2;
1652         for (k=0; k<bs; k++) {
1653           for (l=0; l<bs; l++) {
1654             MatScalar v = *aa++;
1655             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1656             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1657           }
1658         }
1659       }
1660     }
1661     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1667 {
1668   PetscErrorCode ierr;
1669   PetscReal      xl,yl,xr,yr,w,h;
1670   PetscDraw      draw;
1671   PetscBool      isnull;
1672 
1673   PetscFunctionBegin;
1674   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1675   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1676   if (isnull) PetscFunctionReturn(0);
1677 
1678   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1679   xr  += w;          yr += h;        xl = -w;     yl = -h;
1680   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1681   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1682   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1683   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1684   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1689 {
1690   PetscErrorCode ierr;
1691   PetscBool      iascii,isbinary,isdraw;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1697   if (iascii) {
1698     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1699   } else if (isbinary) {
1700     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1701   } else if (isdraw) {
1702     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1703   } else {
1704     Mat B;
1705     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1706     ierr = MatView(B,viewer);CHKERRQ(ierr);
1707     ierr = MatDestroy(&B);CHKERRQ(ierr);
1708   }
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 
1713 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1714 {
1715   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1716   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1717   PetscInt    *ai = a->i,*ailen = a->ilen;
1718   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1719   MatScalar   *ap,*aa = a->a;
1720 
1721   PetscFunctionBegin;
1722   for (k=0; k<m; k++) { /* loop over rows */
1723     row = im[k]; brow = row/bs;
1724     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1725     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1726     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1727     nrow = ailen[brow];
1728     for (l=0; l<n; l++) { /* loop over columns */
1729       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1730       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1731       col  = in[l];
1732       bcol = col/bs;
1733       cidx = col%bs;
1734       ridx = row%bs;
1735       high = nrow;
1736       low  = 0; /* assume unsorted */
1737       while (high-low > 5) {
1738         t = (low+high)/2;
1739         if (rp[t] > bcol) high = t;
1740         else             low  = t;
1741       }
1742       for (i=low; i<high; i++) {
1743         if (rp[i] > bcol) break;
1744         if (rp[i] == bcol) {
1745           *v++ = ap[bs2*i+bs*cidx+ridx];
1746           goto finished;
1747         }
1748       }
1749       *v++ = 0.0;
1750 finished:;
1751     }
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1757 {
1758   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1759   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1760   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1761   PetscErrorCode    ierr;
1762   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1763   PetscBool         roworiented=a->roworiented;
1764   const PetscScalar *value     = v;
1765   MatScalar         *ap=NULL,*aa = a->a,*bap;
1766 
1767   PetscFunctionBegin;
1768   if (roworiented) {
1769     stepval = (n-1)*bs;
1770   } else {
1771     stepval = (m-1)*bs;
1772   }
1773   for (k=0; k<m; k++) { /* loop over added rows */
1774     row = im[k];
1775     if (row < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1778 #endif
1779     rp   = aj + ai[row];
1780     if (!A->structure_only) ap = aa + bs2*ai[row];
1781     rmax = imax[row];
1782     nrow = ailen[row];
1783     low  = 0;
1784     high = nrow;
1785     for (l=0; l<n; l++) { /* loop over added columns */
1786       if (in[l] < 0) continue;
1787 #if defined(PETSC_USE_DEBUG)
1788       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1789 #endif
1790       col = in[l];
1791       if (!A->structure_only) {
1792         if (roworiented) {
1793           value = v + (k*(stepval+bs) + l)*bs;
1794         } else {
1795           value = v + (l*(stepval+bs) + k)*bs;
1796         }
1797       }
1798       if (col <= lastcol) low = 0;
1799       else high = nrow;
1800       lastcol = col;
1801       while (high-low > 7) {
1802         t = (low+high)/2;
1803         if (rp[t] > col) high = t;
1804         else             low  = t;
1805       }
1806       for (i=low; i<high; i++) {
1807         if (rp[i] > col) break;
1808         if (rp[i] == col) {
1809           if (A->structure_only) goto noinsert2;
1810           bap = ap +  bs2*i;
1811           if (roworiented) {
1812             if (is == ADD_VALUES) {
1813               for (ii=0; ii<bs; ii++,value+=stepval) {
1814                 for (jj=ii; jj<bs2; jj+=bs) {
1815                   bap[jj] += *value++;
1816                 }
1817               }
1818             } else {
1819               for (ii=0; ii<bs; ii++,value+=stepval) {
1820                 for (jj=ii; jj<bs2; jj+=bs) {
1821                   bap[jj] = *value++;
1822                 }
1823               }
1824             }
1825           } else {
1826             if (is == ADD_VALUES) {
1827               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1828                 for (jj=0; jj<bs; jj++) {
1829                   bap[jj] += value[jj];
1830                 }
1831                 bap += bs;
1832               }
1833             } else {
1834               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1835                 for (jj=0; jj<bs; jj++) {
1836                   bap[jj]  = value[jj];
1837                 }
1838                 bap += bs;
1839               }
1840             }
1841           }
1842           goto noinsert2;
1843         }
1844       }
1845       if (nonew == 1) goto noinsert2;
1846       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1847       if (A->structure_only) {
1848         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1849       } else {
1850         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1851       }
1852       N = nrow++ - 1; high++;
1853       /* shift up all the later entries in this row */
1854       for (ii=N; ii>=i; ii--) {
1855         rp[ii+1] = rp[ii];
1856         if (!A->structure_only) {
1857           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1858         }
1859       }
1860       if (N >= i && !A->structure_only) {
1861         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1862       }
1863 
1864       rp[i] = col;
1865       if (!A->structure_only) {
1866         bap   = ap +  bs2*i;
1867         if (roworiented) {
1868           for (ii=0; ii<bs; ii++,value+=stepval) {
1869             for (jj=ii; jj<bs2; jj+=bs) {
1870               bap[jj] = *value++;
1871             }
1872           }
1873         } else {
1874           for (ii=0; ii<bs; ii++,value+=stepval) {
1875             for (jj=0; jj<bs; jj++) {
1876               *bap++ = *value++;
1877             }
1878           }
1879         }
1880       }
1881 noinsert2:;
1882       low = i;
1883     }
1884     ailen[row] = nrow;
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1890 {
1891   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1892   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1893   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1894   PetscErrorCode ierr;
1895   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1896   MatScalar      *aa  = a->a,*ap;
1897   PetscReal      ratio=0.6;
1898 
1899   PetscFunctionBegin;
1900   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1901 
1902   if (m) rmax = ailen[0];
1903   for (i=1; i<mbs; i++) {
1904     /* move each row back by the amount of empty slots (fshift) before it*/
1905     fshift += imax[i-1] - ailen[i-1];
1906     rmax    = PetscMax(rmax,ailen[i]);
1907     if (fshift) {
1908       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1909       N  = ailen[i];
1910       for (j=0; j<N; j++) {
1911         ip[j-fshift] = ip[j];
1912         if (!A->structure_only) {
1913           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1914         }
1915       }
1916     }
1917     ai[i] = ai[i-1] + ailen[i-1];
1918   }
1919   if (mbs) {
1920     fshift += imax[mbs-1] - ailen[mbs-1];
1921     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1922   }
1923 
1924   /* reset ilen and imax for each row */
1925   a->nonzerorowcnt = 0;
1926   if (A->structure_only) {
1927     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1928   } else { /* !A->structure_only */
1929     for (i=0; i<mbs; i++) {
1930       ailen[i] = imax[i] = ai[i+1] - ai[i];
1931       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1932     }
1933   }
1934   a->nz = ai[mbs];
1935 
1936   /* diagonals may have moved, so kill the diagonal pointers */
1937   a->idiagvalid = PETSC_FALSE;
1938   if (fshift && a->diag) {
1939     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1940     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1941     a->diag = 0;
1942   }
1943   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1944   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1945   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1946   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1947 
1948   A->info.mallocs    += a->reallocs;
1949   a->reallocs         = 0;
1950   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1951   a->rmax             = rmax;
1952 
1953   if (!A->structure_only) {
1954     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1955   }
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 /*
1960    This function returns an array of flags which indicate the locations of contiguous
1961    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1962    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1963    Assume: sizes should be long enough to hold all the values.
1964 */
1965 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1966 {
1967   PetscInt  i,j,k,row;
1968   PetscBool flg;
1969 
1970   PetscFunctionBegin;
1971   for (i=0,j=0; i<n; j++) {
1972     row = idx[i];
1973     if (row%bs!=0) { /* Not the begining of a block */
1974       sizes[j] = 1;
1975       i++;
1976     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1977       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1978       i++;
1979     } else { /* Begining of the block, so check if the complete block exists */
1980       flg = PETSC_TRUE;
1981       for (k=1; k<bs; k++) {
1982         if (row+k != idx[i+k]) { /* break in the block */
1983           flg = PETSC_FALSE;
1984           break;
1985         }
1986       }
1987       if (flg) { /* No break in the bs */
1988         sizes[j] = bs;
1989         i       += bs;
1990       } else {
1991         sizes[j] = 1;
1992         i++;
1993       }
1994     }
1995   }
1996   *bs_max = j;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2001 {
2002   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2003   PetscErrorCode    ierr;
2004   PetscInt          i,j,k,count,*rows;
2005   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2006   PetscScalar       zero = 0.0;
2007   MatScalar         *aa;
2008   const PetscScalar *xx;
2009   PetscScalar       *bb;
2010 
2011   PetscFunctionBegin;
2012   /* fix right hand side if needed */
2013   if (x && b) {
2014     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2015     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2016     for (i=0; i<is_n; i++) {
2017       bb[is_idx[i]] = diag*xx[is_idx[i]];
2018     }
2019     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2020     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2021   }
2022 
2023   /* Make a copy of the IS and  sort it */
2024   /* allocate memory for rows,sizes */
2025   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2026 
2027   /* copy IS values to rows, and sort them */
2028   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2029   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2030 
2031   if (baij->keepnonzeropattern) {
2032     for (i=0; i<is_n; i++) sizes[i] = 1;
2033     bs_max          = is_n;
2034   } else {
2035     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2036     A->nonzerostate++;
2037   }
2038 
2039   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2040     row = rows[j];
2041     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2042     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2043     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2044     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2045       if (diag != (PetscScalar)0.0) {
2046         if (baij->ilen[row/bs] > 0) {
2047           baij->ilen[row/bs]       = 1;
2048           baij->j[baij->i[row/bs]] = row/bs;
2049 
2050           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2051         }
2052         /* Now insert all the diagonal values for this bs */
2053         for (k=0; k<bs; k++) {
2054           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2055         }
2056       } else { /* (diag == 0.0) */
2057         baij->ilen[row/bs] = 0;
2058       } /* end (diag == 0.0) */
2059     } else { /* (sizes[i] != bs) */
2060 #if defined(PETSC_USE_DEBUG)
2061       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2062 #endif
2063       for (k=0; k<count; k++) {
2064         aa[0] =  zero;
2065         aa   += bs;
2066       }
2067       if (diag != (PetscScalar)0.0) {
2068         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2069       }
2070     }
2071   }
2072 
2073   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2074   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2079 {
2080   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2081   PetscErrorCode    ierr;
2082   PetscInt          i,j,k,count;
2083   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2084   PetscScalar       zero = 0.0;
2085   MatScalar         *aa;
2086   const PetscScalar *xx;
2087   PetscScalar       *bb;
2088   PetscBool         *zeroed,vecs = PETSC_FALSE;
2089 
2090   PetscFunctionBegin;
2091   /* fix right hand side if needed */
2092   if (x && b) {
2093     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2094     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2095     vecs = PETSC_TRUE;
2096   }
2097 
2098   /* zero the columns */
2099   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2100   for (i=0; i<is_n; i++) {
2101     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2102     zeroed[is_idx[i]] = PETSC_TRUE;
2103   }
2104   for (i=0; i<A->rmap->N; i++) {
2105     if (!zeroed[i]) {
2106       row = i/bs;
2107       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2108         for (k=0; k<bs; k++) {
2109           col = bs*baij->j[j] + k;
2110           if (zeroed[col]) {
2111             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2112             if (vecs) bb[i] -= aa[0]*xx[col];
2113             aa[0] = 0.0;
2114           }
2115         }
2116       }
2117     } else if (vecs) bb[i] = diag*xx[i];
2118   }
2119   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2120   if (vecs) {
2121     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2122     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2123   }
2124 
2125   /* zero the rows */
2126   for (i=0; i<is_n; i++) {
2127     row   = is_idx[i];
2128     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2129     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2130     for (k=0; k<count; k++) {
2131       aa[0] =  zero;
2132       aa   += bs;
2133     }
2134     if (diag != (PetscScalar)0.0) {
2135       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2136     }
2137   }
2138   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2143 {
2144   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2145   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2146   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2147   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2148   PetscErrorCode ierr;
2149   PetscInt       ridx,cidx,bs2=a->bs2;
2150   PetscBool      roworiented=a->roworiented;
2151   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2152 
2153   PetscFunctionBegin;
2154   for (k=0; k<m; k++) { /* loop over added rows */
2155     row  = im[k];
2156     brow = row/bs;
2157     if (row < 0) continue;
2158 #if defined(PETSC_USE_DEBUG)
2159     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2160 #endif
2161     rp   = aj + ai[brow];
2162     if (!A->structure_only) ap = aa + bs2*ai[brow];
2163     rmax = imax[brow];
2164     nrow = ailen[brow];
2165     low  = 0;
2166     high = nrow;
2167     for (l=0; l<n; l++) { /* loop over added columns */
2168       if (in[l] < 0) continue;
2169 #if defined(PETSC_USE_DEBUG)
2170       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2171 #endif
2172       col  = in[l]; bcol = col/bs;
2173       ridx = row % bs; cidx = col % bs;
2174       if (!A->structure_only) {
2175         if (roworiented) {
2176           value = v[l + k*n];
2177         } else {
2178           value = v[k + l*m];
2179         }
2180       }
2181       if (col <= lastcol) low = 0; else high = nrow;
2182       lastcol = col;
2183       while (high-low > 7) {
2184         t = (low+high)/2;
2185         if (rp[t] > bcol) high = t;
2186         else              low  = t;
2187       }
2188       for (i=low; i<high; i++) {
2189         if (rp[i] > bcol) break;
2190         if (rp[i] == bcol) {
2191           bap = ap +  bs2*i + bs*cidx + ridx;
2192           if (!A->structure_only) {
2193             if (is == ADD_VALUES) *bap += value;
2194             else                  *bap  = value;
2195           }
2196           goto noinsert1;
2197         }
2198       }
2199       if (nonew == 1) goto noinsert1;
2200       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2201       if (A->structure_only) {
2202         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2203       } else {
2204         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2205       }
2206       N = nrow++ - 1; high++;
2207       /* shift up all the later entries in this row */
2208       for (ii=N; ii>=i; ii--) {
2209         rp[ii+1] = rp[ii];
2210         if (!A->structure_only) {
2211           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2212         }
2213       }
2214       if (N>=i && !A->structure_only) {
2215         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2216       }
2217       rp[i]                      = bcol;
2218       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2219       a->nz++;
2220       A->nonzerostate++;
2221 noinsert1:;
2222       low = i;
2223     }
2224     ailen[brow] = nrow;
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2230 {
2231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2232   Mat            outA;
2233   PetscErrorCode ierr;
2234   PetscBool      row_identity,col_identity;
2235 
2236   PetscFunctionBegin;
2237   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2238   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2239   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2240   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2241 
2242   outA            = inA;
2243   inA->factortype = MAT_FACTOR_LU;
2244   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2245   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2246 
2247   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2248 
2249   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2250   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2251   a->row = row;
2252   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2254   a->col = col;
2255 
2256   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2257   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2258   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2259   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2260 
2261   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2262   if (!a->solve_work) {
2263     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2264     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2265   }
2266   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2271 {
2272   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2273   PetscInt    i,nz,mbs;
2274 
2275   PetscFunctionBegin;
2276   nz  = baij->maxnz;
2277   mbs = baij->mbs;
2278   for (i=0; i<nz; i++) {
2279     baij->j[i] = indices[i];
2280   }
2281   baij->nz = nz;
2282   for (i=0; i<mbs; i++) {
2283     baij->ilen[i] = baij->imax[i];
2284   }
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 /*@
2289     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2290        in the matrix.
2291 
2292   Input Parameters:
2293 +  mat - the SeqBAIJ matrix
2294 -  indices - the column indices
2295 
2296   Level: advanced
2297 
2298   Notes:
2299     This can be called if you have precomputed the nonzero structure of the
2300   matrix and want to provide it to the matrix object to improve the performance
2301   of the MatSetValues() operation.
2302 
2303     You MUST have set the correct numbers of nonzeros per row in the call to
2304   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2305 
2306     MUST be called before any calls to MatSetValues();
2307 
2308 @*/
2309 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2315   PetscValidPointer(indices,2);
2316   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2321 {
2322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2323   PetscErrorCode ierr;
2324   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2325   PetscReal      atmp;
2326   PetscScalar    *x,zero = 0.0;
2327   MatScalar      *aa;
2328   PetscInt       ncols,brow,krow,kcol;
2329 
2330   PetscFunctionBegin;
2331   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2332   bs  = A->rmap->bs;
2333   aa  = a->a;
2334   ai  = a->i;
2335   aj  = a->j;
2336   mbs = a->mbs;
2337 
2338   ierr = VecSet(v,zero);CHKERRQ(ierr);
2339   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2340   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2341   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2342   for (i=0; i<mbs; i++) {
2343     ncols = ai[1] - ai[0]; ai++;
2344     brow  = bs*i;
2345     for (j=0; j<ncols; j++) {
2346       for (kcol=0; kcol<bs; kcol++) {
2347         for (krow=0; krow<bs; krow++) {
2348           atmp = PetscAbsScalar(*aa);aa++;
2349           row  = brow + krow;   /* row index */
2350           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2351         }
2352       }
2353       aj++;
2354     }
2355   }
2356   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   /* If the two matrices have the same copy implementation, use fast copy. */
2366   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2367     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2368     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2369     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2370 
2371     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2372     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2373     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2374     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2375   } else {
2376     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2391 {
2392   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2393 
2394   PetscFunctionBegin;
2395   *array = a->a;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2400 {
2401   PetscFunctionBegin;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406 {
2407   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2409   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   /* Set the number of nonzeros in the new matrix */
2414   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2419 {
2420   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2421   PetscErrorCode ierr;
2422   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2423   PetscBLASInt   one=1;
2424 
2425   PetscFunctionBegin;
2426   if (str == SAME_NONZERO_PATTERN) {
2427     PetscScalar  alpha = a;
2428     PetscBLASInt bnz;
2429     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2430     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2431     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2432   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2433     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2434   } else {
2435     Mat      B;
2436     PetscInt *nnz;
2437     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2438     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2439     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2440     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2441     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2442     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2443     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2444     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2445     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2446     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2447     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2448     ierr = PetscFree(nnz);CHKERRQ(ierr);
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2465 {
2466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2467   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2468   MatScalar   *aa = a->a;
2469 
2470   PetscFunctionBegin;
2471   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 /*
2476     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2477 */
2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2479 {
2480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2481   PetscErrorCode ierr;
2482   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2483   PetscInt       nz = a->i[m],row,*jj,mr,col;
2484 
2485   PetscFunctionBegin;
2486   *nn = n;
2487   if (!ia) PetscFunctionReturn(0);
2488   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2489   else {
2490     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2491     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2492     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2493     jj   = a->j;
2494     for (i=0; i<nz; i++) {
2495       collengths[jj[i]]++;
2496     }
2497     cia[0] = oshift;
2498     for (i=0; i<n; i++) {
2499       cia[i+1] = cia[i] + collengths[i];
2500     }
2501     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2502     jj   = a->j;
2503     for (row=0; row<m; row++) {
2504       mr = a->i[row+1] - a->i[row];
2505       for (i=0; i<mr; i++) {
2506         col = *jj++;
2507 
2508         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2509       }
2510     }
2511     ierr = PetscFree(collengths);CHKERRQ(ierr);
2512     *ia  = cia; *ja = cja;
2513   }
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2518 {
2519   PetscErrorCode ierr;
2520 
2521   PetscFunctionBegin;
2522   if (!ia) PetscFunctionReturn(0);
2523   ierr = PetscFree(*ia);CHKERRQ(ierr);
2524   ierr = PetscFree(*ja);CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 /*
2529  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2530  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2531  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2532  */
2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2534 {
2535   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2536   PetscErrorCode ierr;
2537   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2538   PetscInt       nz = a->i[m],row,*jj,mr,col;
2539   PetscInt       *cspidx;
2540 
2541   PetscFunctionBegin;
2542   *nn = n;
2543   if (!ia) PetscFunctionReturn(0);
2544 
2545   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2546   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2547   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2549   jj   = a->j;
2550   for (i=0; i<nz; i++) {
2551     collengths[jj[i]]++;
2552   }
2553   cia[0] = oshift;
2554   for (i=0; i<n; i++) {
2555     cia[i+1] = cia[i] + collengths[i];
2556   }
2557   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2558   jj   = a->j;
2559   for (row=0; row<m; row++) {
2560     mr = a->i[row+1] - a->i[row];
2561     for (i=0; i<mr; i++) {
2562       col = *jj++;
2563       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2564       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2565     }
2566   }
2567   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2568   *ia    = cia; *ja = cja;
2569   *spidx = cspidx;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2574 {
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2579   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2584 {
2585   PetscErrorCode ierr;
2586   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2587 
2588   PetscFunctionBegin;
2589   if (!Y->preallocated || !aij->nz) {
2590     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2591   }
2592   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 /* -------------------------------------------------------------------*/
2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2598                                        MatGetRow_SeqBAIJ,
2599                                        MatRestoreRow_SeqBAIJ,
2600                                        MatMult_SeqBAIJ_N,
2601                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2602                                        MatMultTranspose_SeqBAIJ,
2603                                        MatMultTransposeAdd_SeqBAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                /* 10*/ 0,
2608                                        MatLUFactor_SeqBAIJ,
2609                                        0,
2610                                        0,
2611                                        MatTranspose_SeqBAIJ,
2612                                /* 15*/ MatGetInfo_SeqBAIJ,
2613                                        MatEqual_SeqBAIJ,
2614                                        MatGetDiagonal_SeqBAIJ,
2615                                        MatDiagonalScale_SeqBAIJ,
2616                                        MatNorm_SeqBAIJ,
2617                                /* 20*/ 0,
2618                                        MatAssemblyEnd_SeqBAIJ,
2619                                        MatSetOption_SeqBAIJ,
2620                                        MatZeroEntries_SeqBAIJ,
2621                                /* 24*/ MatZeroRows_SeqBAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                /* 29*/ MatSetUp_SeqBAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                /* 34*/ MatDuplicate_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        MatILUFactor_SeqBAIJ,
2635                                        0,
2636                                /* 39*/ MatAXPY_SeqBAIJ,
2637                                        MatCreateSubMatrices_SeqBAIJ,
2638                                        MatIncreaseOverlap_SeqBAIJ,
2639                                        MatGetValues_SeqBAIJ,
2640                                        MatCopy_SeqBAIJ,
2641                                /* 44*/ 0,
2642                                        MatScale_SeqBAIJ,
2643                                        MatShift_SeqBAIJ,
2644                                        0,
2645                                        MatZeroRowsColumns_SeqBAIJ,
2646                                /* 49*/ 0,
2647                                        MatGetRowIJ_SeqBAIJ,
2648                                        MatRestoreRowIJ_SeqBAIJ,
2649                                        MatGetColumnIJ_SeqBAIJ,
2650                                        MatRestoreColumnIJ_SeqBAIJ,
2651                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        MatSetValuesBlocked_SeqBAIJ,
2656                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2657                                        MatDestroy_SeqBAIJ,
2658                                        MatView_SeqBAIJ,
2659                                        0,
2660                                        0,
2661                                /* 64*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2667                                        0,
2668                                        MatConvert_Basic,
2669                                        0,
2670                                        0,
2671                                /* 74*/ 0,
2672                                        MatFDColoringApply_BAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 79*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        MatLoad_SeqBAIJ,
2681                                /* 84*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                /* 89*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 94*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /* 99*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /*104*/ 0,
2702                                        MatRealPart_SeqBAIJ,
2703                                        MatImaginaryPart_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*109*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        MatMissingDiagonal_SeqBAIJ,
2711                                /*114*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*119*/ 0,
2717                                        0,
2718                                        MatMultHermitianTranspose_SeqBAIJ,
2719                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2720                                        0,
2721                                /*124*/ 0,
2722                                        0,
2723                                        MatInvertBlockDiagonal_SeqBAIJ,
2724                                        0,
2725                                        0,
2726                                /*129*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /*134*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /*139*/ MatSetBlockSizes_Default,
2737                                        0,
2738                                        0,
2739                                        MatFDColoringSetUp_SeqXAIJ,
2740                                        0,
2741                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2742                                        MatDestroySubMatrices_SeqBAIJ
2743 };
2744 
2745 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2746 {
2747   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2748   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2753 
2754   /* allocate space for values if not already there */
2755   if (!aij->saved_values) {
2756     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2757     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2758   }
2759 
2760   /* copy values over */
2761   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2766 {
2767   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2768   PetscErrorCode ierr;
2769   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2770 
2771   PetscFunctionBegin;
2772   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2773   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2774 
2775   /* copy values over */
2776   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2782 
2783 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2784 {
2785   Mat_SeqBAIJ    *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i,mbs,nbs,bs2;
2788   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2789 
2790   PetscFunctionBegin;
2791   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2792   if (nz == MAT_SKIP_ALLOCATION) {
2793     skipallocation = PETSC_TRUE;
2794     nz             = 0;
2795   }
2796 
2797   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2798   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2799   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2801 
2802   B->preallocated = PETSC_TRUE;
2803 
2804   mbs = B->rmap->n/bs;
2805   nbs = B->cmap->n/bs;
2806   bs2 = bs*bs;
2807 
2808   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2809 
2810   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2811   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2812   if (nnz) {
2813     for (i=0; i<mbs; i++) {
2814       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2815       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2816     }
2817   }
2818 
2819   b    = (Mat_SeqBAIJ*)B->data;
2820   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2821   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2822   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2823 
2824   if (!flg) {
2825     switch (bs) {
2826     case 1:
2827       B->ops->mult    = MatMult_SeqBAIJ_1;
2828       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2829       break;
2830     case 2:
2831       B->ops->mult    = MatMult_SeqBAIJ_2;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2833       break;
2834     case 3:
2835       B->ops->mult    = MatMult_SeqBAIJ_3;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2837       break;
2838     case 4:
2839       B->ops->mult    = MatMult_SeqBAIJ_4;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2841       break;
2842     case 5:
2843       B->ops->mult    = MatMult_SeqBAIJ_5;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2845       break;
2846     case 6:
2847       B->ops->mult    = MatMult_SeqBAIJ_6;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2849       break;
2850     case 7:
2851       B->ops->mult    = MatMult_SeqBAIJ_7;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2853       break;
2854     case 9:
2855 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2856       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2858 #else
2859       B->ops->mult    = MatMult_SeqBAIJ_N;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2861 #endif
2862       break;
2863     case 11:
2864       B->ops->mult    = MatMult_SeqBAIJ_11;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2866       break;
2867     case 15:
2868       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2870       break;
2871     default:
2872       B->ops->mult    = MatMult_SeqBAIJ_N;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2874       break;
2875     }
2876   }
2877   B->ops->sor = MatSOR_SeqBAIJ;
2878   b->mbs = mbs;
2879   b->nbs = nbs;
2880   if (!skipallocation) {
2881     if (!b->imax) {
2882       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2883       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2884 
2885       b->free_imax_ilen = PETSC_TRUE;
2886     }
2887     /* b->ilen will count nonzeros in each block row so far. */
2888     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2889     if (!nnz) {
2890       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2891       else if (nz < 0) nz = 1;
2892       for (i=0; i<mbs; i++) b->imax[i] = nz;
2893       nz = nz*mbs;
2894     } else {
2895       nz = 0;
2896       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2897     }
2898 
2899     /* allocate the matrix space */
2900     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2901     if (B->structure_only) {
2902       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2903       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2904       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2905     } else {
2906       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2907       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2908       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2909     }
2910     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2911 
2912     if (B->structure_only) {
2913       b->singlemalloc = PETSC_FALSE;
2914       b->free_a       = PETSC_FALSE;
2915     } else {
2916       b->singlemalloc = PETSC_TRUE;
2917       b->free_a       = PETSC_TRUE;
2918     }
2919     b->free_ij = PETSC_TRUE;
2920 
2921     b->i[0] = 0;
2922     for (i=1; i<mbs+1; i++) {
2923       b->i[i] = b->i[i-1] + b->imax[i-1];
2924     }
2925 
2926   } else {
2927     b->free_a  = PETSC_FALSE;
2928     b->free_ij = PETSC_FALSE;
2929   }
2930 
2931   b->bs2              = bs2;
2932   b->mbs              = mbs;
2933   b->nz               = 0;
2934   b->maxnz            = nz;
2935   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2936   B->was_assembled    = PETSC_FALSE;
2937   B->assembled        = PETSC_FALSE;
2938   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2943 {
2944   PetscInt       i,m,nz,nz_max=0,*nnz;
2945   PetscScalar    *values=0;
2946   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2951   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2952   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2953   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2954   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2955   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2956   m    = B->rmap->n/bs;
2957 
2958   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2959   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2960   for (i=0; i<m; i++) {
2961     nz = ii[i+1]- ii[i];
2962     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2963     nz_max = PetscMax(nz_max, nz);
2964     nnz[i] = nz;
2965   }
2966   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2967   ierr = PetscFree(nnz);CHKERRQ(ierr);
2968 
2969   values = (PetscScalar*)V;
2970   if (!values) {
2971     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2972   }
2973   for (i=0; i<m; i++) {
2974     PetscInt          ncols  = ii[i+1] - ii[i];
2975     const PetscInt    *icols = jj + ii[i];
2976     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2977     if (!roworiented) {
2978       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2979     } else {
2980       PetscInt j;
2981       for (j=0; j<ncols; j++) {
2982         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2983         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2984       }
2985     }
2986   }
2987   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2988   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2989   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2990   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 /*MC
2995    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2996    block sparse compressed row format.
2997 
2998    Options Database Keys:
2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3000 
3001   Level: beginner
3002 
3003 .seealso: MatCreateSeqBAIJ()
3004 M*/
3005 
3006 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3007 
3008 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3009 {
3010   PetscErrorCode ierr;
3011   PetscMPIInt    size;
3012   Mat_SeqBAIJ    *b;
3013 
3014   PetscFunctionBegin;
3015   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3016   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3017 
3018   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3019   B->data = (void*)b;
3020   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3021 
3022   b->row          = 0;
3023   b->col          = 0;
3024   b->icol         = 0;
3025   b->reallocs     = 0;
3026   b->saved_values = 0;
3027 
3028   b->roworiented        = PETSC_TRUE;
3029   b->nonew              = 0;
3030   b->diag               = 0;
3031   B->spptr              = 0;
3032   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3033   b->keepnonzeropattern = PETSC_FALSE;
3034 
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3044 #if defined(PETSC_HAVE_HYPRE)
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3046 #endif
3047   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3052 {
3053   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3054   PetscErrorCode ierr;
3055   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3056 
3057   PetscFunctionBegin;
3058   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3059 
3060   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3061     c->imax           = a->imax;
3062     c->ilen           = a->ilen;
3063     c->free_imax_ilen = PETSC_FALSE;
3064   } else {
3065     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3066     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3067     for (i=0; i<mbs; i++) {
3068       c->imax[i] = a->imax[i];
3069       c->ilen[i] = a->ilen[i];
3070     }
3071     c->free_imax_ilen = PETSC_TRUE;
3072   }
3073 
3074   /* allocate the matrix space */
3075   if (mallocmatspace) {
3076     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3077       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3078       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3079 
3080       c->i            = a->i;
3081       c->j            = a->j;
3082       c->singlemalloc = PETSC_FALSE;
3083       c->free_a       = PETSC_TRUE;
3084       c->free_ij      = PETSC_FALSE;
3085       c->parent       = A;
3086       C->preallocated = PETSC_TRUE;
3087       C->assembled    = PETSC_TRUE;
3088 
3089       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3090       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3091       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3092     } else {
3093       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3094       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3095 
3096       c->singlemalloc = PETSC_TRUE;
3097       c->free_a       = PETSC_TRUE;
3098       c->free_ij      = PETSC_TRUE;
3099 
3100       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3101       if (mbs > 0) {
3102         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3103         if (cpvalues == MAT_COPY_VALUES) {
3104           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3105         } else {
3106           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3107         }
3108       }
3109       C->preallocated = PETSC_TRUE;
3110       C->assembled    = PETSC_TRUE;
3111     }
3112   }
3113 
3114   c->roworiented = a->roworiented;
3115   c->nonew       = a->nonew;
3116 
3117   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3118   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3119 
3120   c->bs2         = a->bs2;
3121   c->mbs         = a->mbs;
3122   c->nbs         = a->nbs;
3123 
3124   if (a->diag) {
3125     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3126       c->diag      = a->diag;
3127       c->free_diag = PETSC_FALSE;
3128     } else {
3129       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3130       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3131       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3132       c->free_diag = PETSC_TRUE;
3133     }
3134   } else c->diag = 0;
3135 
3136   c->nz         = a->nz;
3137   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3138   c->solve_work = NULL;
3139   c->mult_work  = NULL;
3140   c->sor_workt  = NULL;
3141   c->sor_work   = NULL;
3142 
3143   c->compressedrow.use   = a->compressedrow.use;
3144   c->compressedrow.nrows = a->compressedrow.nrows;
3145   if (a->compressedrow.use) {
3146     i    = a->compressedrow.nrows;
3147     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3148     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3149     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3150     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3151   } else {
3152     c->compressedrow.use    = PETSC_FALSE;
3153     c->compressedrow.i      = NULL;
3154     c->compressedrow.rindex = NULL;
3155   }
3156   C->nonzerostate = A->nonzerostate;
3157 
3158   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3163 {
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3168   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3169   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3170   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3175 {
3176   Mat_SeqBAIJ    *a;
3177   PetscErrorCode ierr;
3178   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3179   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3180   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3181   PetscInt       *masked,nmask,tmp,bs2,ishift;
3182   PetscMPIInt    size;
3183   int            fd;
3184   PetscScalar    *aa;
3185   MPI_Comm       comm;
3186 
3187   PetscFunctionBegin;
3188   /* force binary viewer to load .info file if it has not yet done so */
3189   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3190   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3191   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3192   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3193   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3194   if (bs < 0) bs = 1;
3195   bs2  = bs*bs;
3196 
3197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3198   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3199   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3200   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3201   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3202   M = header[1]; N = header[2]; nz = header[3];
3203 
3204   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3205   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3206 
3207   /*
3208      This code adds extra rows to make sure the number of rows is
3209     divisible by the blocksize
3210   */
3211   mbs        = M/bs;
3212   extra_rows = bs - M + bs*(mbs);
3213   if (extra_rows == bs) extra_rows = 0;
3214   else mbs++;
3215   if (extra_rows) {
3216     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3217   }
3218 
3219   /* Set global sizes if not already set */
3220   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3221     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3222   } else { /* Check if the matrix global sizes are correct */
3223     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3224     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3225       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3226     }
3227     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3228   }
3229 
3230   /* read in row lengths */
3231   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3232   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3233   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3234 
3235   /* read in column indices */
3236   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3237   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3238   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3239 
3240   /* loop over row lengths determining block row lengths */
3241   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3242   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3243   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3244   rowcount = 0;
3245   nzcount  = 0;
3246   for (i=0; i<mbs; i++) {
3247     nmask = 0;
3248     for (j=0; j<bs; j++) {
3249       kmax = rowlengths[rowcount];
3250       for (k=0; k<kmax; k++) {
3251         tmp = jj[nzcount++]/bs;
3252         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3253       }
3254       rowcount++;
3255     }
3256     browlengths[i] += nmask;
3257     /* zero out the mask elements we set */
3258     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3259   }
3260 
3261   /* Do preallocation  */
3262   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3263   a    = (Mat_SeqBAIJ*)newmat->data;
3264 
3265   /* set matrix "i" values */
3266   a->i[0] = 0;
3267   for (i=1; i<= mbs; i++) {
3268     a->i[i]      = a->i[i-1] + browlengths[i-1];
3269     a->ilen[i-1] = browlengths[i-1];
3270   }
3271   a->nz = 0;
3272   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3273 
3274   /* read in nonzero values */
3275   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3276   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3277   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3278 
3279   /* set "a" and "j" values into matrix */
3280   nzcount = 0; jcount = 0;
3281   for (i=0; i<mbs; i++) {
3282     nzcountb = nzcount;
3283     nmask    = 0;
3284     for (j=0; j<bs; j++) {
3285       kmax = rowlengths[i*bs+j];
3286       for (k=0; k<kmax; k++) {
3287         tmp = jj[nzcount++]/bs;
3288         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3289       }
3290     }
3291     /* sort the masked values */
3292     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3293 
3294     /* set "j" values into matrix */
3295     maskcount = 1;
3296     for (j=0; j<nmask; j++) {
3297       a->j[jcount++]  = masked[j];
3298       mask[masked[j]] = maskcount++;
3299     }
3300     /* set "a" values into matrix */
3301     ishift = bs2*a->i[i];
3302     for (j=0; j<bs; j++) {
3303       kmax = rowlengths[i*bs+j];
3304       for (k=0; k<kmax; k++) {
3305         tmp       = jj[nzcountb]/bs;
3306         block     = mask[tmp] - 1;
3307         point     = jj[nzcountb] - bs*tmp;
3308         idx       = ishift + bs2*block + j + bs*point;
3309         a->a[idx] = (MatScalar)aa[nzcountb++];
3310       }
3311     }
3312     /* zero out the mask elements we set */
3313     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3314   }
3315   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3316 
3317   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3318   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3319   ierr = PetscFree(aa);CHKERRQ(ierr);
3320   ierr = PetscFree(jj);CHKERRQ(ierr);
3321   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3322 
3323   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3324   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 /*@C
3329    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3330    compressed row) format.  For good matrix assembly performance the
3331    user should preallocate the matrix storage by setting the parameter nz
3332    (or the array nnz).  By setting these parameters accurately, performance
3333    during matrix assembly can be increased by more than a factor of 50.
3334 
3335    Collective on MPI_Comm
3336 
3337    Input Parameters:
3338 +  comm - MPI communicator, set to PETSC_COMM_SELF
3339 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3340           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3341 .  m - number of rows
3342 .  n - number of columns
3343 .  nz - number of nonzero blocks  per block row (same for all rows)
3344 -  nnz - array containing the number of nonzero blocks in the various block rows
3345          (possibly different for each block row) or NULL
3346 
3347    Output Parameter:
3348 .  A - the matrix
3349 
3350    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3351    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3352    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3353 
3354    Options Database Keys:
3355 .   -mat_no_unroll - uses code that does not unroll the loops in the
3356                      block calculations (much slower)
3357 .    -mat_block_size - size of the blocks to use
3358 
3359    Level: intermediate
3360 
3361    Notes:
3362    The number of rows and columns must be divisible by blocksize.
3363 
3364    If the nnz parameter is given then the nz parameter is ignored
3365 
3366    A nonzero block is any block that as 1 or more nonzeros in it
3367 
3368    The block AIJ format is fully compatible with standard Fortran 77
3369    storage.  That is, the stored row and column indices can begin at
3370    either one (as in Fortran) or zero.  See the users' manual for details.
3371 
3372    Specify the preallocated storage with either nz or nnz (not both).
3373    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3374    allocation.  See Users-Manual: ch_mat for details.
3375    matrices.
3376 
3377 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3378 @*/
3379 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3380 {
3381   PetscErrorCode ierr;
3382 
3383   PetscFunctionBegin;
3384   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3385   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3386   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3387   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 /*@C
3392    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3393    per row in the matrix. For good matrix assembly performance the
3394    user should preallocate the matrix storage by setting the parameter nz
3395    (or the array nnz).  By setting these parameters accurately, performance
3396    during matrix assembly can be increased by more than a factor of 50.
3397 
3398    Collective on MPI_Comm
3399 
3400    Input Parameters:
3401 +  B - the matrix
3402 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3403           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3404 .  nz - number of block nonzeros per block row (same for all rows)
3405 -  nnz - array containing the number of block nonzeros in the various block rows
3406          (possibly different for each block row) or NULL
3407 
3408    Options Database Keys:
3409 .   -mat_no_unroll - uses code that does not unroll the loops in the
3410                      block calculations (much slower)
3411 .   -mat_block_size - size of the blocks to use
3412 
3413    Level: intermediate
3414 
3415    Notes:
3416    If the nnz parameter is given then the nz parameter is ignored
3417 
3418    You can call MatGetInfo() to get information on how effective the preallocation was;
3419    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3420    You can also run with the option -info and look for messages with the string
3421    malloc in them to see if additional memory allocation was needed.
3422 
3423    The block AIJ format is fully compatible with standard Fortran 77
3424    storage.  That is, the stored row and column indices can begin at
3425    either one (as in Fortran) or zero.  See the users' manual for details.
3426 
3427    Specify the preallocated storage with either nz or nnz (not both).
3428    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3429    allocation.  See Users-Manual: ch_mat for details.
3430 
3431 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3432 @*/
3433 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3434 {
3435   PetscErrorCode ierr;
3436 
3437   PetscFunctionBegin;
3438   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3439   PetscValidType(B,1);
3440   PetscValidLogicalCollectiveInt(B,bs,2);
3441   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3442   PetscFunctionReturn(0);
3443 }
3444 
3445 /*@C
3446    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3447    (the default sequential PETSc format).
3448 
3449    Collective on MPI_Comm
3450 
3451    Input Parameters:
3452 +  B - the matrix
3453 .  i - the indices into j for the start of each local row (starts with zero)
3454 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3455 -  v - optional values in the matrix
3456 
3457    Level: developer
3458 
3459    Notes:
3460    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3461    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3462    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3463    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3464    block column and the second index is over columns within a block.
3465 
3466 .keywords: matrix, aij, compressed row, sparse
3467 
3468 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3469 @*/
3470 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3471 {
3472   PetscErrorCode ierr;
3473 
3474   PetscFunctionBegin;
3475   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3476   PetscValidType(B,1);
3477   PetscValidLogicalCollectiveInt(B,bs,2);
3478   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 
3483 /*@
3484      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3485 
3486      Collective on MPI_Comm
3487 
3488    Input Parameters:
3489 +  comm - must be an MPI communicator of size 1
3490 .  bs - size of block
3491 .  m - number of rows
3492 .  n - number of columns
3493 .  i - row indices
3494 .  j - column indices
3495 -  a - matrix values
3496 
3497    Output Parameter:
3498 .  mat - the matrix
3499 
3500    Level: advanced
3501 
3502    Notes:
3503        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3504     once the matrix is destroyed
3505 
3506        You cannot set new nonzero locations into this matrix, that will generate an error.
3507 
3508        The i and j indices are 0 based
3509 
3510        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3511 
3512       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3513       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3514       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3515       with column-major ordering within blocks.
3516 
3517 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3518 
3519 @*/
3520 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3521 {
3522   PetscErrorCode ierr;
3523   PetscInt       ii;
3524   Mat_SeqBAIJ    *baij;
3525 
3526   PetscFunctionBegin;
3527   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3528   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3529 
3530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3531   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3532   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3533   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3534   baij = (Mat_SeqBAIJ*)(*mat)->data;
3535   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3536   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3537 
3538   baij->i = i;
3539   baij->j = j;
3540   baij->a = a;
3541 
3542   baij->singlemalloc = PETSC_FALSE;
3543   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3544   baij->free_a       = PETSC_FALSE;
3545   baij->free_ij      = PETSC_FALSE;
3546 
3547   for (ii=0; ii<m; ii++) {
3548     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3549 #if defined(PETSC_USE_DEBUG)
3550     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3551 #endif
3552   }
3553 #if defined(PETSC_USE_DEBUG)
3554   for (ii=0; ii<baij->i[m]; ii++) {
3555     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3556     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3557   }
3558 #endif
3559 
3560   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3566 {
3567   PetscErrorCode ierr;
3568   PetscMPIInt    size;
3569 
3570   PetscFunctionBegin;
3571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3572   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3573     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3574   } else {
3575     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3576   }
3577   PetscFunctionReturn(0);
3578 }
3579