xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8dd7b8de55100f02b4a49c7581bb3bd4e4b5a7df)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     diag = a->diag;
1051     for (i=0; i<a->mbs; i++) {
1052       if (diag[i] >= ii[i+1]) {
1053         *missing = PETSC_TRUE;
1054         if (d) *d = i;
1055         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1056         break;
1057       }
1058     }
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1089 {
1090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1093   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1094 
1095   PetscFunctionBegin;
1096   *nn = n;
1097   if (!ia) PetscFunctionReturn(0);
1098   if (symmetric) {
1099     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1100     nz   = tia[n];
1101   } else {
1102     tia = a->i; tja = a->j;
1103   }
1104 
1105   if (!blockcompressed && bs > 1) {
1106     (*nn) *= bs;
1107     /* malloc & create the natural set of indices */
1108     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1109     if (n) {
1110       (*ia)[0] = oshift;
1111       for (j=1; j<bs; j++) {
1112         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1113       }
1114     }
1115 
1116     for (i=1; i<n; i++) {
1117       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1118       for (j=1; j<bs; j++) {
1119         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1120       }
1121     }
1122     if (n) {
1123       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1124     }
1125 
1126     if (inja) {
1127       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1128       cnt = 0;
1129       for (i=0; i<n; i++) {
1130         for (j=0; j<bs; j++) {
1131           for (k=tia[i]; k<tia[i+1]; k++) {
1132             for (l=0; l<bs; l++) {
1133               (*ja)[cnt++] = bs*tja[k] + l;
1134             }
1135           }
1136         }
1137       }
1138     }
1139 
1140     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1141       ierr = PetscFree(tia);CHKERRQ(ierr);
1142       ierr = PetscFree(tja);CHKERRQ(ierr);
1143     }
1144   } else if (oshift == 1) {
1145     if (symmetric) {
1146       nz = tia[A->rmap->n/bs];
1147       /*  add 1 to i and j indices */
1148       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1149       *ia = tia;
1150       if (ja) {
1151         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1152         *ja = tja;
1153       }
1154     } else {
1155       nz = a->i[A->rmap->n/bs];
1156       /* malloc space and  add 1 to i and j indices */
1157       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1158       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1159       if (ja) {
1160         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1161         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1162       }
1163     }
1164   } else {
1165     *ia = tia;
1166     if (ja) *ja = tja;
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   if (!ia) PetscFunctionReturn(0);
1177   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1178     ierr = PetscFree(*ia);CHKERRQ(ierr);
1179     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1185 {
1186   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190 #if defined(PETSC_USE_LOG)
1191   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1192 #endif
1193   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1194   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1196   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1197   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1198   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1200   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1204   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1205   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1206 
1207   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1208   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1209   ierr = PetscFree(A->data);CHKERRQ(ierr);
1210 
1211   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1222 #if defined(PETSC_HAVE_HYPRE)
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1224 #endif
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1231 {
1232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   switch (op) {
1237   case MAT_ROW_ORIENTED:
1238     a->roworiented = flg;
1239     break;
1240   case MAT_KEEP_NONZERO_PATTERN:
1241     a->keepnonzeropattern = flg;
1242     break;
1243   case MAT_NEW_NONZERO_LOCATIONS:
1244     a->nonew = (flg ? 0 : 1);
1245     break;
1246   case MAT_NEW_NONZERO_LOCATION_ERR:
1247     a->nonew = (flg ? -1 : 0);
1248     break;
1249   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1250     a->nonew = (flg ? -2 : 0);
1251     break;
1252   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1253     a->nounused = (flg ? -1 : 0);
1254     break;
1255   case MAT_NEW_DIAGONALS:
1256   case MAT_IGNORE_OFF_PROC_ENTRIES:
1257   case MAT_USE_HASH_TABLE:
1258     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1259     break;
1260   case MAT_SPD:
1261   case MAT_SYMMETRIC:
1262   case MAT_STRUCTURALLY_SYMMETRIC:
1263   case MAT_HERMITIAN:
1264   case MAT_SYMMETRY_ETERNAL:
1265   case MAT_SUBMAT_SINGLEIS:
1266   case MAT_STRUCTURE_ONLY:
1267     /* These options are handled directly by MatSetOption() */
1268     break;
1269   default:
1270     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1276 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1277 {
1278   PetscErrorCode ierr;
1279   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1280   MatScalar      *aa_i;
1281   PetscScalar    *v_i;
1282 
1283   PetscFunctionBegin;
1284   bs  = A->rmap->bs;
1285   bs2 = bs*bs;
1286   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1287 
1288   bn  = row/bs;   /* Block number */
1289   bp  = row % bs; /* Block Position */
1290   M   = ai[bn+1] - ai[bn];
1291   *nz = bs*M;
1292 
1293   if (v) {
1294     *v = 0;
1295     if (*nz) {
1296       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1297       for (i=0; i<M; i++) { /* for each block in the block row */
1298         v_i  = *v + i*bs;
1299         aa_i = aa + bs2*(ai[bn] + i);
1300         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1301       }
1302     }
1303   }
1304 
1305   if (idx) {
1306     *idx = 0;
1307     if (*nz) {
1308       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1309       for (i=0; i<M; i++) { /* for each block in the block row */
1310         idx_i = *idx + i*bs;
1311         itmp  = bs*aj[ai[bn] + i];
1312         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1313       }
1314     }
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1320 {
1321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1330 {
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1335   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1340 {
1341   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1342   Mat            C;
1343   PetscErrorCode ierr;
1344   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1345   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1346   MatScalar      *ata,*aa=a->a;
1347 
1348   PetscFunctionBegin;
1349   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1350   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1351     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1352 
1353     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1354     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1355     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1356     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1357 
1358     at  = (Mat_SeqBAIJ*)C->data;
1359     ati = at->i;
1360     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1361   } else {
1362     C = *B;
1363     at = (Mat_SeqBAIJ*)C->data;
1364     ati = at->i;
1365   }
1366 
1367   atj = at->j;
1368   ata = at->a;
1369 
1370   /* Copy ati into atfill so we have locations of the next free space in atj */
1371   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1372 
1373   /* Walk through A row-wise and mark nonzero entries of A^T. */
1374   for (i=0; i<mbs; i++) {
1375     anzj = ai[i+1] - ai[i];
1376     for (j=0; j<anzj; j++) {
1377       atj[atfill[*aj]] = i;
1378       for (kr=0; kr<bs; kr++) {
1379         for (k=0; k<bs; k++) {
1380           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1381         }
1382       }
1383       atfill[*aj++] += 1;
1384     }
1385   }
1386   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388 
1389   /* Clean up temporary space and complete requests. */
1390   ierr = PetscFree(atfill);CHKERRQ(ierr);
1391 
1392   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1393     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1394     *B = C;
1395   } else {
1396     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1402 {
1403   PetscErrorCode ierr;
1404   Mat            Btrans;
1405 
1406   PetscFunctionBegin;
1407   *f   = PETSC_FALSE;
1408   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1409   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1410   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1415 {
1416   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1419   int            fd;
1420   PetscScalar    *aa;
1421   FILE           *file;
1422 
1423   PetscFunctionBegin;
1424   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1425   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1426   col_lens[0] = MAT_FILE_CLASSID;
1427 
1428   col_lens[1] = A->rmap->N;
1429   col_lens[2] = A->cmap->n;
1430   col_lens[3] = a->nz*bs2;
1431 
1432   /* store lengths of each row and write (including header) to file */
1433   count = 0;
1434   for (i=0; i<a->mbs; i++) {
1435     for (j=0; j<bs; j++) {
1436       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1437     }
1438   }
1439   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1440   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1441 
1442   /* store column indices (zero start index) */
1443   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1444   count = 0;
1445   for (i=0; i<a->mbs; i++) {
1446     for (j=0; j<bs; j++) {
1447       for (k=a->i[i]; k<a->i[i+1]; k++) {
1448         for (l=0; l<bs; l++) {
1449           jj[count++] = bs*a->j[k] + l;
1450         }
1451       }
1452     }
1453   }
1454   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1455   ierr = PetscFree(jj);CHKERRQ(ierr);
1456 
1457   /* store nonzero values */
1458   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1459   count = 0;
1460   for (i=0; i<a->mbs; i++) {
1461     for (j=0; j<bs; j++) {
1462       for (k=a->i[i]; k<a->i[i+1]; k++) {
1463         for (l=0; l<bs; l++) {
1464           aa[count++] = a->a[bs2*k + l*bs + j];
1465         }
1466       }
1467     }
1468   }
1469   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1470   ierr = PetscFree(aa);CHKERRQ(ierr);
1471 
1472   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1473   if (file) {
1474     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1480 {
1481   PetscErrorCode ierr;
1482   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1483   PetscInt       i,bs = A->rmap->bs,k;
1484 
1485   PetscFunctionBegin;
1486   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1487   for (i=0; i<a->mbs; i++) {
1488     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1489     for (k=a->i[i]; k<a->i[i+1]; k++) {
1490       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1491     }
1492     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1493   }
1494   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1499 {
1500   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode    ierr;
1502   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1503   PetscViewerFormat format;
1504 
1505   PetscFunctionBegin;
1506   if (A->structure_only) {
1507     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1508     PetscFunctionReturn(0);
1509   }
1510 
1511   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1512   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1513     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1514   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1515     const char *matname;
1516     Mat        aij;
1517     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1518     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1519     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1520     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1521     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1522   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1523       PetscFunctionReturn(0);
1524   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1525     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1526     for (i=0; i<a->mbs; i++) {
1527       for (j=0; j<bs; j++) {
1528         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1529         for (k=a->i[i]; k<a->i[i+1]; k++) {
1530           for (l=0; l<bs; l++) {
1531 #if defined(PETSC_USE_COMPLEX)
1532             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1534                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             }
1541 #else
1542             if (a->a[bs2*k + l*bs + j] != 0.0) {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1544             }
1545 #endif
1546           }
1547         }
1548         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1549       }
1550     }
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1552   } else {
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1554     for (i=0; i<a->mbs; i++) {
1555       for (j=0; j<bs; j++) {
1556         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1557         for (k=a->i[i]; k<a->i[i+1]; k++) {
1558           for (l=0; l<bs; l++) {
1559 #if defined(PETSC_USE_COMPLEX)
1560             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1562                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             }
1569 #else
1570             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1571 #endif
1572           }
1573         }
1574         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1575       }
1576     }
1577     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1578   }
1579   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #include <petscdraw.h>
1584 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1585 {
1586   Mat               A = (Mat) Aa;
1587   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1588   PetscErrorCode    ierr;
1589   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1590   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1591   MatScalar         *aa;
1592   PetscViewer       viewer;
1593   PetscViewerFormat format;
1594 
1595   PetscFunctionBegin;
1596   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1597   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1598   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1599 
1600   /* loop over matrix elements drawing boxes */
1601 
1602   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1603     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1604     /* Blue for negative, Cyan for zero and  Red for positive */
1605     color = PETSC_DRAW_BLUE;
1606     for (i=0,row=0; i<mbs; i++,row+=bs) {
1607       for (j=a->i[i]; j<a->i[i+1]; j++) {
1608         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610         aa  = a->a + j*bs2;
1611         for (k=0; k<bs; k++) {
1612           for (l=0; l<bs; l++) {
1613             if (PetscRealPart(*aa++) >=  0.) continue;
1614             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1615           }
1616         }
1617       }
1618     }
1619     color = PETSC_DRAW_CYAN;
1620     for (i=0,row=0; i<mbs; i++,row+=bs) {
1621       for (j=a->i[i]; j<a->i[i+1]; j++) {
1622         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624         aa  = a->a + j*bs2;
1625         for (k=0; k<bs; k++) {
1626           for (l=0; l<bs; l++) {
1627             if (PetscRealPart(*aa++) != 0.) continue;
1628             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1629           }
1630         }
1631       }
1632     }
1633     color = PETSC_DRAW_RED;
1634     for (i=0,row=0; i<mbs; i++,row+=bs) {
1635       for (j=a->i[i]; j<a->i[i+1]; j++) {
1636         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1637         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1638         aa  = a->a + j*bs2;
1639         for (k=0; k<bs; k++) {
1640           for (l=0; l<bs; l++) {
1641             if (PetscRealPart(*aa++) <= 0.) continue;
1642             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1643           }
1644         }
1645       }
1646     }
1647     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1648   } else {
1649     /* use contour shading to indicate magnitude of values */
1650     /* first determine max of all nonzero values */
1651     PetscReal minv = 0.0, maxv = 0.0;
1652     PetscDraw popup;
1653 
1654     for (i=0; i<a->nz*a->bs2; i++) {
1655       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1656     }
1657     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1658     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1659     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1660 
1661     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1662     for (i=0,row=0; i<mbs; i++,row+=bs) {
1663       for (j=a->i[i]; j<a->i[i+1]; j++) {
1664         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1665         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1666         aa  = a->a + j*bs2;
1667         for (k=0; k<bs; k++) {
1668           for (l=0; l<bs; l++) {
1669             MatScalar v = *aa++;
1670             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1671             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1672           }
1673         }
1674       }
1675     }
1676     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1682 {
1683   PetscErrorCode ierr;
1684   PetscReal      xl,yl,xr,yr,w,h;
1685   PetscDraw      draw;
1686   PetscBool      isnull;
1687 
1688   PetscFunctionBegin;
1689   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1690   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1691   if (isnull) PetscFunctionReturn(0);
1692 
1693   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1694   xr  += w;          yr += h;        xl = -w;     yl = -h;
1695   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1697   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1699   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1704 {
1705   PetscErrorCode ierr;
1706   PetscBool      iascii,isbinary,isdraw;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1712   if (iascii) {
1713     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1714   } else if (isbinary) {
1715     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1716   } else if (isdraw) {
1717     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1718   } else {
1719     Mat B;
1720     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1721     ierr = MatView(B,viewer);CHKERRQ(ierr);
1722     ierr = MatDestroy(&B);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 
1728 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1729 {
1730   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1731   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1732   PetscInt    *ai = a->i,*ailen = a->ilen;
1733   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1734   MatScalar   *ap,*aa = a->a;
1735 
1736   PetscFunctionBegin;
1737   for (k=0; k<m; k++) { /* loop over rows */
1738     row = im[k]; brow = row/bs;
1739     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1740     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1741     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1742     nrow = ailen[brow];
1743     for (l=0; l<n; l++) { /* loop over columns */
1744       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1745       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1746       col  = in[l];
1747       bcol = col/bs;
1748       cidx = col%bs;
1749       ridx = row%bs;
1750       high = nrow;
1751       low  = 0; /* assume unsorted */
1752       while (high-low > 5) {
1753         t = (low+high)/2;
1754         if (rp[t] > bcol) high = t;
1755         else             low  = t;
1756       }
1757       for (i=low; i<high; i++) {
1758         if (rp[i] > bcol) break;
1759         if (rp[i] == bcol) {
1760           *v++ = ap[bs2*i+bs*cidx+ridx];
1761           goto finished;
1762         }
1763       }
1764       *v++ = 0.0;
1765 finished:;
1766     }
1767   }
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1772 {
1773   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1774   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1775   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1776   PetscErrorCode    ierr;
1777   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1778   PetscBool         roworiented=a->roworiented;
1779   const PetscScalar *value     = v;
1780   MatScalar         *ap=NULL,*aa = a->a,*bap;
1781 
1782   PetscFunctionBegin;
1783   if (roworiented) {
1784     stepval = (n-1)*bs;
1785   } else {
1786     stepval = (m-1)*bs;
1787   }
1788   for (k=0; k<m; k++) { /* loop over added rows */
1789     row = im[k];
1790     if (row < 0) continue;
1791 #if defined(PETSC_USE_DEBUG)
1792     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1793 #endif
1794     rp   = aj + ai[row];
1795     if (!A->structure_only) ap = aa + bs2*ai[row];
1796     rmax = imax[row];
1797     nrow = ailen[row];
1798     low  = 0;
1799     high = nrow;
1800     for (l=0; l<n; l++) { /* loop over added columns */
1801       if (in[l] < 0) continue;
1802 #if defined(PETSC_USE_DEBUG)
1803       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1804 #endif
1805       col = in[l];
1806       if (!A->structure_only) {
1807         if (roworiented) {
1808           value = v + (k*(stepval+bs) + l)*bs;
1809         } else {
1810           value = v + (l*(stepval+bs) + k)*bs;
1811         }
1812       }
1813       if (col <= lastcol) low = 0;
1814       else high = nrow;
1815       lastcol = col;
1816       while (high-low > 7) {
1817         t = (low+high)/2;
1818         if (rp[t] > col) high = t;
1819         else             low  = t;
1820       }
1821       for (i=low; i<high; i++) {
1822         if (rp[i] > col) break;
1823         if (rp[i] == col) {
1824           if (A->structure_only) goto noinsert2;
1825           bap = ap +  bs2*i;
1826           if (roworiented) {
1827             if (is == ADD_VALUES) {
1828               for (ii=0; ii<bs; ii++,value+=stepval) {
1829                 for (jj=ii; jj<bs2; jj+=bs) {
1830                   bap[jj] += *value++;
1831                 }
1832               }
1833             } else {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] = *value++;
1837                 }
1838               }
1839             }
1840           } else {
1841             if (is == ADD_VALUES) {
1842               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1843                 for (jj=0; jj<bs; jj++) {
1844                   bap[jj] += value[jj];
1845                 }
1846                 bap += bs;
1847               }
1848             } else {
1849               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1850                 for (jj=0; jj<bs; jj++) {
1851                   bap[jj]  = value[jj];
1852                 }
1853                 bap += bs;
1854               }
1855             }
1856           }
1857           goto noinsert2;
1858         }
1859       }
1860       if (nonew == 1) goto noinsert2;
1861       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1862       if (A->structure_only) {
1863         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1864       } else {
1865         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1866       }
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1870       rp[i] = col;
1871       if (!A->structure_only) {
1872         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1873         bap   = ap +  bs2*i;
1874         if (roworiented) {
1875           for (ii=0; ii<bs; ii++,value+=stepval) {
1876             for (jj=ii; jj<bs2; jj+=bs) {
1877               bap[jj] = *value++;
1878             }
1879           }
1880         } else {
1881           for (ii=0; ii<bs; ii++,value+=stepval) {
1882             for (jj=0; jj<bs; jj++) {
1883               *bap++ = *value++;
1884             }
1885           }
1886         }
1887       }
1888 noinsert2:;
1889       low = i;
1890     }
1891     ailen[row] = nrow;
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1897 {
1898   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1899   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1900   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1901   PetscErrorCode ierr;
1902   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1903   MatScalar      *aa  = a->a,*ap;
1904   PetscReal      ratio=0.6;
1905 
1906   PetscFunctionBegin;
1907   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1908 
1909   if (m) rmax = ailen[0];
1910   for (i=1; i<mbs; i++) {
1911     /* move each row back by the amount of empty slots (fshift) before it*/
1912     fshift += imax[i-1] - ailen[i-1];
1913     rmax    = PetscMax(rmax,ailen[i]);
1914     if (fshift) {
1915       ip = aj + ai[i];
1916       ap = aa + bs2*ai[i];
1917       N  = ailen[i];
1918       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1919       if (!A->structure_only) {
1920         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1921       }
1922     }
1923     ai[i] = ai[i-1] + ailen[i-1];
1924   }
1925   if (mbs) {
1926     fshift += imax[mbs-1] - ailen[mbs-1];
1927     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1928   }
1929 
1930   /* reset ilen and imax for each row */
1931   a->nonzerorowcnt = 0;
1932   if (A->structure_only) {
1933     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1934   } else { /* !A->structure_only */
1935     for (i=0; i<mbs; i++) {
1936       ailen[i] = imax[i] = ai[i+1] - ai[i];
1937       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1938     }
1939   }
1940   a->nz = ai[mbs];
1941 
1942   /* diagonals may have moved, so kill the diagonal pointers */
1943   a->idiagvalid = PETSC_FALSE;
1944   if (fshift && a->diag) {
1945     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1946     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1947     a->diag = 0;
1948   }
1949   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1950   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1953 
1954   A->info.mallocs    += a->reallocs;
1955   a->reallocs         = 0;
1956   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1957   a->rmax             = rmax;
1958 
1959   if (!A->structure_only) {
1960     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1961   }
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 /*
1966    This function returns an array of flags which indicate the locations of contiguous
1967    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1968    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1969    Assume: sizes should be long enough to hold all the values.
1970 */
1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1972 {
1973   PetscInt  i,j,k,row;
1974   PetscBool flg;
1975 
1976   PetscFunctionBegin;
1977   for (i=0,j=0; i<n; j++) {
1978     row = idx[i];
1979     if (row%bs!=0) { /* Not the begining of a block */
1980       sizes[j] = 1;
1981       i++;
1982     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1983       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1984       i++;
1985     } else { /* Begining of the block, so check if the complete block exists */
1986       flg = PETSC_TRUE;
1987       for (k=1; k<bs; k++) {
1988         if (row+k != idx[i+k]) { /* break in the block */
1989           flg = PETSC_FALSE;
1990           break;
1991         }
1992       }
1993       if (flg) { /* No break in the bs */
1994         sizes[j] = bs;
1995         i       += bs;
1996       } else {
1997         sizes[j] = 1;
1998         i++;
1999       }
2000     }
2001   }
2002   *bs_max = j;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2007 {
2008   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2009   PetscErrorCode    ierr;
2010   PetscInt          i,j,k,count,*rows;
2011   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2012   PetscScalar       zero = 0.0;
2013   MatScalar         *aa;
2014   const PetscScalar *xx;
2015   PetscScalar       *bb;
2016 
2017   PetscFunctionBegin;
2018   /* fix right hand side if needed */
2019   if (x && b) {
2020     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2021     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2022     for (i=0; i<is_n; i++) {
2023       bb[is_idx[i]] = diag*xx[is_idx[i]];
2024     }
2025     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2026     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2027   }
2028 
2029   /* Make a copy of the IS and  sort it */
2030   /* allocate memory for rows,sizes */
2031   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2032 
2033   /* copy IS values to rows, and sort them */
2034   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2035   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2036 
2037   if (baij->keepnonzeropattern) {
2038     for (i=0; i<is_n; i++) sizes[i] = 1;
2039     bs_max          = is_n;
2040   } else {
2041     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2042     A->nonzerostate++;
2043   }
2044 
2045   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2046     row = rows[j];
2047     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2048     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2049     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2050     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2051       if (diag != (PetscScalar)0.0) {
2052         if (baij->ilen[row/bs] > 0) {
2053           baij->ilen[row/bs]       = 1;
2054           baij->j[baij->i[row/bs]] = row/bs;
2055 
2056           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2057         }
2058         /* Now insert all the diagonal values for this bs */
2059         for (k=0; k<bs; k++) {
2060           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2061         }
2062       } else { /* (diag == 0.0) */
2063         baij->ilen[row/bs] = 0;
2064       } /* end (diag == 0.0) */
2065     } else { /* (sizes[i] != bs) */
2066 #if defined(PETSC_USE_DEBUG)
2067       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2068 #endif
2069       for (k=0; k<count; k++) {
2070         aa[0] =  zero;
2071         aa   += bs;
2072       }
2073       if (diag != (PetscScalar)0.0) {
2074         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2075       }
2076     }
2077   }
2078 
2079   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2085 {
2086   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2087   PetscErrorCode    ierr;
2088   PetscInt          i,j,k,count;
2089   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2090   PetscScalar       zero = 0.0;
2091   MatScalar         *aa;
2092   const PetscScalar *xx;
2093   PetscScalar       *bb;
2094   PetscBool         *zeroed,vecs = PETSC_FALSE;
2095 
2096   PetscFunctionBegin;
2097   /* fix right hand side if needed */
2098   if (x && b) {
2099     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2100     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2101     vecs = PETSC_TRUE;
2102   }
2103 
2104   /* zero the columns */
2105   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2106   for (i=0; i<is_n; i++) {
2107     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2108     zeroed[is_idx[i]] = PETSC_TRUE;
2109   }
2110   for (i=0; i<A->rmap->N; i++) {
2111     if (!zeroed[i]) {
2112       row = i/bs;
2113       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2114         for (k=0; k<bs; k++) {
2115           col = bs*baij->j[j] + k;
2116           if (zeroed[col]) {
2117             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2118             if (vecs) bb[i] -= aa[0]*xx[col];
2119             aa[0] = 0.0;
2120           }
2121         }
2122       }
2123     } else if (vecs) bb[i] = diag*xx[i];
2124   }
2125   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2126   if (vecs) {
2127     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2128     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2129   }
2130 
2131   /* zero the rows */
2132   for (i=0; i<is_n; i++) {
2133     row   = is_idx[i];
2134     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2135     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2136     for (k=0; k<count; k++) {
2137       aa[0] =  zero;
2138       aa   += bs;
2139     }
2140     if (diag != (PetscScalar)0.0) {
2141       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2142     }
2143   }
2144   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2149 {
2150   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2151   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2152   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2153   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2154   PetscErrorCode ierr;
2155   PetscInt       ridx,cidx,bs2=a->bs2;
2156   PetscBool      roworiented=a->roworiented;
2157   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2158 
2159   PetscFunctionBegin;
2160   for (k=0; k<m; k++) { /* loop over added rows */
2161     row  = im[k];
2162     brow = row/bs;
2163     if (row < 0) continue;
2164 #if defined(PETSC_USE_DEBUG)
2165     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2166 #endif
2167     rp   = aj + ai[brow];
2168     if (!A->structure_only) ap = aa + bs2*ai[brow];
2169     rmax = imax[brow];
2170     nrow = ailen[brow];
2171     low  = 0;
2172     high = nrow;
2173     for (l=0; l<n; l++) { /* loop over added columns */
2174       if (in[l] < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2177 #endif
2178       col  = in[l]; bcol = col/bs;
2179       ridx = row % bs; cidx = col % bs;
2180       if (!A->structure_only) {
2181         if (roworiented) {
2182           value = v[l + k*n];
2183         } else {
2184           value = v[k + l*m];
2185         }
2186       }
2187       if (col <= lastcol) low = 0; else high = nrow;
2188       lastcol = col;
2189       while (high-low > 7) {
2190         t = (low+high)/2;
2191         if (rp[t] > bcol) high = t;
2192         else              low  = t;
2193       }
2194       for (i=low; i<high; i++) {
2195         if (rp[i] > bcol) break;
2196         if (rp[i] == bcol) {
2197           bap = ap +  bs2*i + bs*cidx + ridx;
2198           if (!A->structure_only) {
2199             if (is == ADD_VALUES) *bap += value;
2200             else                  *bap  = value;
2201           }
2202           goto noinsert1;
2203         }
2204       }
2205       if (nonew == 1) goto noinsert1;
2206       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2207       if (A->structure_only) {
2208         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2209       } else {
2210         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2211       }
2212       N = nrow++ - 1; high++;
2213       /* shift up all the later entries in this row */
2214       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2215       rp[i] = bcol;
2216       if (!A->structure_only) {
2217         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2218         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2219         ap[bs2*i + bs*cidx + ridx] = value;
2220       }
2221       a->nz++;
2222       A->nonzerostate++;
2223 noinsert1:;
2224       low = i;
2225     }
2226     ailen[brow] = nrow;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2232 {
2233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2234   Mat            outA;
2235   PetscErrorCode ierr;
2236   PetscBool      row_identity,col_identity;
2237 
2238   PetscFunctionBegin;
2239   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2240   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2241   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2242   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2243 
2244   outA            = inA;
2245   inA->factortype = MAT_FACTOR_LU;
2246   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2247   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2248 
2249   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2250 
2251   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2252   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2253   a->row = row;
2254   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2255   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2256   a->col = col;
2257 
2258   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2259   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2260   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2261   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2262 
2263   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2264   if (!a->solve_work) {
2265     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2266     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2267   }
2268   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2273 {
2274   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2275   PetscInt    i,nz,mbs;
2276 
2277   PetscFunctionBegin;
2278   nz  = baij->maxnz;
2279   mbs = baij->mbs;
2280   for (i=0; i<nz; i++) {
2281     baij->j[i] = indices[i];
2282   }
2283   baij->nz = nz;
2284   for (i=0; i<mbs; i++) {
2285     baij->ilen[i] = baij->imax[i];
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 /*@
2291     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2292        in the matrix.
2293 
2294   Input Parameters:
2295 +  mat - the SeqBAIJ matrix
2296 -  indices - the column indices
2297 
2298   Level: advanced
2299 
2300   Notes:
2301     This can be called if you have precomputed the nonzero structure of the
2302   matrix and want to provide it to the matrix object to improve the performance
2303   of the MatSetValues() operation.
2304 
2305     You MUST have set the correct numbers of nonzeros per row in the call to
2306   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2307 
2308     MUST be called before any calls to MatSetValues();
2309 
2310 @*/
2311 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2312 {
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2317   PetscValidPointer(indices,2);
2318   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2325   PetscErrorCode ierr;
2326   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2327   PetscReal      atmp;
2328   PetscScalar    *x,zero = 0.0;
2329   MatScalar      *aa;
2330   PetscInt       ncols,brow,krow,kcol;
2331 
2332   PetscFunctionBegin;
2333   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2334   bs  = A->rmap->bs;
2335   aa  = a->a;
2336   ai  = a->i;
2337   aj  = a->j;
2338   mbs = a->mbs;
2339 
2340   ierr = VecSet(v,zero);CHKERRQ(ierr);
2341   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2342   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2343   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2344   for (i=0; i<mbs; i++) {
2345     ncols = ai[1] - ai[0]; ai++;
2346     brow  = bs*i;
2347     for (j=0; j<ncols; j++) {
2348       for (kcol=0; kcol<bs; kcol++) {
2349         for (krow=0; krow<bs; krow++) {
2350           atmp = PetscAbsScalar(*aa);aa++;
2351           row  = brow + krow;   /* row index */
2352           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2353         }
2354       }
2355       aj++;
2356     }
2357   }
2358   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2363 {
2364   PetscErrorCode ierr;
2365 
2366   PetscFunctionBegin;
2367   /* If the two matrices have the same copy implementation, use fast copy. */
2368   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2369     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2370     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2371     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2372 
2373     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2374     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2375     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2376     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2377   } else {
2378     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2384 {
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2393 {
2394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2395 
2396   PetscFunctionBegin;
2397   *array = a->a;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2402 {
2403   PetscFunctionBegin;
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2408 {
2409   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2410   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2411   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2412   PetscErrorCode ierr;
2413 
2414   PetscFunctionBegin;
2415   /* Set the number of nonzeros in the new matrix */
2416   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2421 {
2422   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2423   PetscErrorCode ierr;
2424   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2425   PetscBLASInt   one=1;
2426 
2427   PetscFunctionBegin;
2428   if (str == SAME_NONZERO_PATTERN) {
2429     PetscScalar  alpha = a;
2430     PetscBLASInt bnz;
2431     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2432     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2433     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2434   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2435     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2436   } else {
2437     Mat      B;
2438     PetscInt *nnz;
2439     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2440     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2441     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2442     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2443     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2444     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2445     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2446     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2447     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2448     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2449     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2450     ierr = PetscFree(nnz);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2456 {
2457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2458   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2459   MatScalar   *aa = a->a;
2460 
2461   PetscFunctionBegin;
2462   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2467 {
2468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2469   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470   MatScalar   *aa = a->a;
2471 
2472   PetscFunctionBegin;
2473   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 /*
2478     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2479 */
2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2481 {
2482   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2483   PetscErrorCode ierr;
2484   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2485   PetscInt       nz = a->i[m],row,*jj,mr,col;
2486 
2487   PetscFunctionBegin;
2488   *nn = n;
2489   if (!ia) PetscFunctionReturn(0);
2490   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2491   else {
2492     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2493     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2494     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2495     jj   = a->j;
2496     for (i=0; i<nz; i++) {
2497       collengths[jj[i]]++;
2498     }
2499     cia[0] = oshift;
2500     for (i=0; i<n; i++) {
2501       cia[i+1] = cia[i] + collengths[i];
2502     }
2503     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2504     jj   = a->j;
2505     for (row=0; row<m; row++) {
2506       mr = a->i[row+1] - a->i[row];
2507       for (i=0; i<mr; i++) {
2508         col = *jj++;
2509 
2510         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2511       }
2512     }
2513     ierr = PetscFree(collengths);CHKERRQ(ierr);
2514     *ia  = cia; *ja = cja;
2515   }
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2520 {
2521   PetscErrorCode ierr;
2522 
2523   PetscFunctionBegin;
2524   if (!ia) PetscFunctionReturn(0);
2525   ierr = PetscFree(*ia);CHKERRQ(ierr);
2526   ierr = PetscFree(*ja);CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 /*
2531  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2532  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2533  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2534  */
2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2536 {
2537   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2540   PetscInt       nz = a->i[m],row,*jj,mr,col;
2541   PetscInt       *cspidx;
2542 
2543   PetscFunctionBegin;
2544   *nn = n;
2545   if (!ia) PetscFunctionReturn(0);
2546 
2547   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2551   jj   = a->j;
2552   for (i=0; i<nz; i++) {
2553     collengths[jj[i]]++;
2554   }
2555   cia[0] = oshift;
2556   for (i=0; i<n; i++) {
2557     cia[i+1] = cia[i] + collengths[i];
2558   }
2559   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (row=0; row<m; row++) {
2562     mr = a->i[row+1] - a->i[row];
2563     for (i=0; i<mr; i++) {
2564       col = *jj++;
2565       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2566       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2567     }
2568   }
2569   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2570   *ia    = cia; *ja = cja;
2571   *spidx = cspidx;
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2576 {
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2581   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2589 
2590   PetscFunctionBegin;
2591   if (!Y->preallocated || !aij->nz) {
2592     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2593   }
2594   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /* -------------------------------------------------------------------*/
2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2600                                        MatGetRow_SeqBAIJ,
2601                                        MatRestoreRow_SeqBAIJ,
2602                                        MatMult_SeqBAIJ_N,
2603                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2604                                        MatMultTranspose_SeqBAIJ,
2605                                        MatMultTransposeAdd_SeqBAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                /* 10*/ 0,
2610                                        MatLUFactor_SeqBAIJ,
2611                                        0,
2612                                        0,
2613                                        MatTranspose_SeqBAIJ,
2614                                /* 15*/ MatGetInfo_SeqBAIJ,
2615                                        MatEqual_SeqBAIJ,
2616                                        MatGetDiagonal_SeqBAIJ,
2617                                        MatDiagonalScale_SeqBAIJ,
2618                                        MatNorm_SeqBAIJ,
2619                                /* 20*/ 0,
2620                                        MatAssemblyEnd_SeqBAIJ,
2621                                        MatSetOption_SeqBAIJ,
2622                                        MatZeroEntries_SeqBAIJ,
2623                                /* 24*/ MatZeroRows_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 29*/ MatSetUp_SeqBAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                /* 34*/ MatDuplicate_SeqBAIJ,
2634                                        0,
2635                                        0,
2636                                        MatILUFactor_SeqBAIJ,
2637                                        0,
2638                                /* 39*/ MatAXPY_SeqBAIJ,
2639                                        MatCreateSubMatrices_SeqBAIJ,
2640                                        MatIncreaseOverlap_SeqBAIJ,
2641                                        MatGetValues_SeqBAIJ,
2642                                        MatCopy_SeqBAIJ,
2643                                /* 44*/ 0,
2644                                        MatScale_SeqBAIJ,
2645                                        MatShift_SeqBAIJ,
2646                                        0,
2647                                        MatZeroRowsColumns_SeqBAIJ,
2648                                /* 49*/ 0,
2649                                        MatGetRowIJ_SeqBAIJ,
2650                                        MatRestoreRowIJ_SeqBAIJ,
2651                                        MatGetColumnIJ_SeqBAIJ,
2652                                        MatRestoreColumnIJ_SeqBAIJ,
2653                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        MatSetValuesBlocked_SeqBAIJ,
2658                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2659                                        MatDestroy_SeqBAIJ,
2660                                        MatView_SeqBAIJ,
2661                                        0,
2662                                        0,
2663                                /* 64*/ 0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2669                                        0,
2670                                        MatConvert_Basic,
2671                                        0,
2672                                        0,
2673                                /* 74*/ 0,
2674                                        MatFDColoringApply_BAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /* 79*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        MatLoad_SeqBAIJ,
2683                                /* 84*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 89*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 94*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /* 99*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /*104*/ 0,
2704                                        MatRealPart_SeqBAIJ,
2705                                        MatImaginaryPart_SeqBAIJ,
2706                                        0,
2707                                        0,
2708                                /*109*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatMissingDiagonal_SeqBAIJ,
2713                                /*114*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*119*/ 0,
2719                                        0,
2720                                        MatMultHermitianTranspose_SeqBAIJ,
2721                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2722                                        0,
2723                                /*124*/ 0,
2724                                        0,
2725                                        MatInvertBlockDiagonal_SeqBAIJ,
2726                                        0,
2727                                        0,
2728                                /*129*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*134*/ 0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                /*139*/ MatSetBlockSizes_Default,
2739                                        0,
2740                                        0,
2741                                        MatFDColoringSetUp_SeqXAIJ,
2742                                        0,
2743                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2744                                        MatDestroySubMatrices_SeqBAIJ
2745 };
2746 
2747 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2748 {
2749   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2750   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2755 
2756   /* allocate space for values if not already there */
2757   if (!aij->saved_values) {
2758     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2760   }
2761 
2762   /* copy values over */
2763   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2768 {
2769   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2770   PetscErrorCode ierr;
2771   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2772 
2773   PetscFunctionBegin;
2774   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2775   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2776 
2777   /* copy values over */
2778   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2784 
2785 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2786 {
2787   Mat_SeqBAIJ    *b;
2788   PetscErrorCode ierr;
2789   PetscInt       i,mbs,nbs,bs2;
2790   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2791 
2792   PetscFunctionBegin;
2793   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2794   if (nz == MAT_SKIP_ALLOCATION) {
2795     skipallocation = PETSC_TRUE;
2796     nz             = 0;
2797   }
2798 
2799   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2803 
2804   B->preallocated = PETSC_TRUE;
2805 
2806   mbs = B->rmap->n/bs;
2807   nbs = B->cmap->n/bs;
2808   bs2 = bs*bs;
2809 
2810   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2811 
2812   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2813   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2814   if (nnz) {
2815     for (i=0; i<mbs; i++) {
2816       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2817       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2818     }
2819   }
2820 
2821   b    = (Mat_SeqBAIJ*)B->data;
2822   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2823   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2824   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2825 
2826   if (!flg) {
2827     switch (bs) {
2828     case 1:
2829       B->ops->mult    = MatMult_SeqBAIJ_1;
2830       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2831       break;
2832     case 2:
2833       B->ops->mult    = MatMult_SeqBAIJ_2;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2835       break;
2836     case 3:
2837       B->ops->mult    = MatMult_SeqBAIJ_3;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2839       break;
2840     case 4:
2841       B->ops->mult    = MatMult_SeqBAIJ_4;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2843       break;
2844     case 5:
2845       B->ops->mult    = MatMult_SeqBAIJ_5;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2847       break;
2848     case 6:
2849       B->ops->mult    = MatMult_SeqBAIJ_6;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2851       break;
2852     case 7:
2853       B->ops->mult    = MatMult_SeqBAIJ_7;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2855       break;
2856     case 9:
2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2858       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2860 #else
2861       B->ops->mult    = MatMult_SeqBAIJ_N;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2863 #endif
2864       break;
2865     case 11:
2866       B->ops->mult    = MatMult_SeqBAIJ_11;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2868       break;
2869     case 15:
2870       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2872       break;
2873     default:
2874       B->ops->mult    = MatMult_SeqBAIJ_N;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     }
2878   }
2879   B->ops->sor = MatSOR_SeqBAIJ;
2880   b->mbs = mbs;
2881   b->nbs = nbs;
2882   if (!skipallocation) {
2883     if (!b->imax) {
2884       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2885       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2886 
2887       b->free_imax_ilen = PETSC_TRUE;
2888     }
2889     /* b->ilen will count nonzeros in each block row so far. */
2890     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2891     if (!nnz) {
2892       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2893       else if (nz < 0) nz = 1;
2894       nz = PetscMin(nz,nbs);
2895       for (i=0; i<mbs; i++) b->imax[i] = nz;
2896       nz = nz*mbs;
2897     } else {
2898       nz = 0;
2899       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2900     }
2901 
2902     /* allocate the matrix space */
2903     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2904     if (B->structure_only) {
2905       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2906       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2907       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2908     } else {
2909       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2910       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2911       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2912     }
2913     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2914 
2915     if (B->structure_only) {
2916       b->singlemalloc = PETSC_FALSE;
2917       b->free_a       = PETSC_FALSE;
2918     } else {
2919       b->singlemalloc = PETSC_TRUE;
2920       b->free_a       = PETSC_TRUE;
2921     }
2922     b->free_ij = PETSC_TRUE;
2923 
2924     b->i[0] = 0;
2925     for (i=1; i<mbs+1; i++) {
2926       b->i[i] = b->i[i-1] + b->imax[i-1];
2927     }
2928 
2929   } else {
2930     b->free_a  = PETSC_FALSE;
2931     b->free_ij = PETSC_FALSE;
2932   }
2933 
2934   b->bs2              = bs2;
2935   b->mbs              = mbs;
2936   b->nz               = 0;
2937   b->maxnz            = nz;
2938   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2939   B->was_assembled    = PETSC_FALSE;
2940   B->assembled        = PETSC_FALSE;
2941   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2946 {
2947   PetscInt       i,m,nz,nz_max=0,*nnz;
2948   PetscScalar    *values=0;
2949   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2954   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2959   m    = B->rmap->n/bs;
2960 
2961   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2962   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2963   for (i=0; i<m; i++) {
2964     nz = ii[i+1]- ii[i];
2965     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2966     nz_max = PetscMax(nz_max, nz);
2967     nnz[i] = nz;
2968   }
2969   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2970   ierr = PetscFree(nnz);CHKERRQ(ierr);
2971 
2972   values = (PetscScalar*)V;
2973   if (!values) {
2974     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2975   }
2976   for (i=0; i<m; i++) {
2977     PetscInt          ncols  = ii[i+1] - ii[i];
2978     const PetscInt    *icols = jj + ii[i];
2979     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2980     if (!roworiented) {
2981       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2982     } else {
2983       PetscInt j;
2984       for (j=0; j<ncols; j++) {
2985         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2986         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2987       }
2988     }
2989   }
2990   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2991   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 /*MC
2998    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2999    block sparse compressed row format.
3000 
3001    Options Database Keys:
3002 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3003 
3004   Level: beginner
3005 
3006 .seealso: MatCreateSeqBAIJ()
3007 M*/
3008 
3009 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3010 
3011 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3012 {
3013   PetscErrorCode ierr;
3014   PetscMPIInt    size;
3015   Mat_SeqBAIJ    *b;
3016 
3017   PetscFunctionBegin;
3018   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3019   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3020 
3021   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3022   B->data = (void*)b;
3023   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3024 
3025   b->row          = 0;
3026   b->col          = 0;
3027   b->icol         = 0;
3028   b->reallocs     = 0;
3029   b->saved_values = 0;
3030 
3031   b->roworiented        = PETSC_TRUE;
3032   b->nonew              = 0;
3033   b->diag               = 0;
3034   B->spptr              = 0;
3035   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3036   b->keepnonzeropattern = PETSC_FALSE;
3037 
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3047 #if defined(PETSC_HAVE_HYPRE)
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3049 #endif
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3057 {
3058   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3059   PetscErrorCode ierr;
3060   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3061 
3062   PetscFunctionBegin;
3063   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3064 
3065   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3066     c->imax           = a->imax;
3067     c->ilen           = a->ilen;
3068     c->free_imax_ilen = PETSC_FALSE;
3069   } else {
3070     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3071     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3072     for (i=0; i<mbs; i++) {
3073       c->imax[i] = a->imax[i];
3074       c->ilen[i] = a->ilen[i];
3075     }
3076     c->free_imax_ilen = PETSC_TRUE;
3077   }
3078 
3079   /* allocate the matrix space */
3080   if (mallocmatspace) {
3081     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3082       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3083       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3084 
3085       c->i            = a->i;
3086       c->j            = a->j;
3087       c->singlemalloc = PETSC_FALSE;
3088       c->free_a       = PETSC_TRUE;
3089       c->free_ij      = PETSC_FALSE;
3090       c->parent       = A;
3091       C->preallocated = PETSC_TRUE;
3092       C->assembled    = PETSC_TRUE;
3093 
3094       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3095       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3096       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3097     } else {
3098       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3099       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3100 
3101       c->singlemalloc = PETSC_TRUE;
3102       c->free_a       = PETSC_TRUE;
3103       c->free_ij      = PETSC_TRUE;
3104 
3105       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3106       if (mbs > 0) {
3107         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3108         if (cpvalues == MAT_COPY_VALUES) {
3109           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3110         } else {
3111           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3112         }
3113       }
3114       C->preallocated = PETSC_TRUE;
3115       C->assembled    = PETSC_TRUE;
3116     }
3117   }
3118 
3119   c->roworiented = a->roworiented;
3120   c->nonew       = a->nonew;
3121 
3122   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3123   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3124 
3125   c->bs2         = a->bs2;
3126   c->mbs         = a->mbs;
3127   c->nbs         = a->nbs;
3128 
3129   if (a->diag) {
3130     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3131       c->diag      = a->diag;
3132       c->free_diag = PETSC_FALSE;
3133     } else {
3134       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3135       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3136       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3137       c->free_diag = PETSC_TRUE;
3138     }
3139   } else c->diag = 0;
3140 
3141   c->nz         = a->nz;
3142   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3143   c->solve_work = NULL;
3144   c->mult_work  = NULL;
3145   c->sor_workt  = NULL;
3146   c->sor_work   = NULL;
3147 
3148   c->compressedrow.use   = a->compressedrow.use;
3149   c->compressedrow.nrows = a->compressedrow.nrows;
3150   if (a->compressedrow.use) {
3151     i    = a->compressedrow.nrows;
3152     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3153     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3154     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3155     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3156   } else {
3157     c->compressedrow.use    = PETSC_FALSE;
3158     c->compressedrow.i      = NULL;
3159     c->compressedrow.rindex = NULL;
3160   }
3161   C->nonzerostate = A->nonzerostate;
3162 
3163   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3168 {
3169   PetscErrorCode ierr;
3170 
3171   PetscFunctionBegin;
3172   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3173   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3174   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3175   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3180 {
3181   Mat_SeqBAIJ    *a;
3182   PetscErrorCode ierr;
3183   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3184   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3185   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3186   PetscInt       *masked,nmask,tmp,bs2,ishift;
3187   PetscMPIInt    size;
3188   int            fd;
3189   PetscScalar    *aa;
3190   MPI_Comm       comm;
3191   PetscBool      isbinary;
3192 
3193   PetscFunctionBegin;
3194   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3195   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3196 
3197   /* force binary viewer to load .info file if it has not yet done so */
3198   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3199   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3200   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3201   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3202   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3203   if (bs < 0) bs = 1;
3204   bs2  = bs*bs;
3205 
3206   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3207   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3208   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3209   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3210   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3211   M = header[1]; N = header[2]; nz = header[3];
3212 
3213   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3214   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3215 
3216   /*
3217      This code adds extra rows to make sure the number of rows is
3218     divisible by the blocksize
3219   */
3220   mbs        = M/bs;
3221   extra_rows = bs - M + bs*(mbs);
3222   if (extra_rows == bs) extra_rows = 0;
3223   else mbs++;
3224   if (extra_rows) {
3225     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3226   }
3227 
3228   /* Set global sizes if not already set */
3229   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3230     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3231   } else { /* Check if the matrix global sizes are correct */
3232     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3233     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3234       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3235     }
3236     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3237   }
3238 
3239   /* read in row lengths */
3240   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3241   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3242   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3243 
3244   /* read in column indices */
3245   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3246   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3247   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3248 
3249   /* loop over row lengths determining block row lengths */
3250   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3251   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3252   ierr     = PetscArrayzero(mask,mbs);CHKERRQ(ierr);
3253   rowcount = 0;
3254   nzcount  = 0;
3255   for (i=0; i<mbs; i++) {
3256     nmask = 0;
3257     for (j=0; j<bs; j++) {
3258       kmax = rowlengths[rowcount];
3259       for (k=0; k<kmax; k++) {
3260         tmp = jj[nzcount++]/bs;
3261         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3262       }
3263       rowcount++;
3264     }
3265     browlengths[i] += nmask;
3266     /* zero out the mask elements we set */
3267     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3268   }
3269 
3270   /* Do preallocation  */
3271   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3272   a    = (Mat_SeqBAIJ*)newmat->data;
3273 
3274   /* set matrix "i" values */
3275   a->i[0] = 0;
3276   for (i=1; i<= mbs; i++) {
3277     a->i[i]      = a->i[i-1] + browlengths[i-1];
3278     a->ilen[i-1] = browlengths[i-1];
3279   }
3280   a->nz = 0;
3281   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3282 
3283   /* read in nonzero values */
3284   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3285   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3286   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3287 
3288   /* set "a" and "j" values into matrix */
3289   nzcount = 0; jcount = 0;
3290   for (i=0; i<mbs; i++) {
3291     nzcountb = nzcount;
3292     nmask    = 0;
3293     for (j=0; j<bs; j++) {
3294       kmax = rowlengths[i*bs+j];
3295       for (k=0; k<kmax; k++) {
3296         tmp = jj[nzcount++]/bs;
3297         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3298       }
3299     }
3300     /* sort the masked values */
3301     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3302 
3303     /* set "j" values into matrix */
3304     maskcount = 1;
3305     for (j=0; j<nmask; j++) {
3306       a->j[jcount++]  = masked[j];
3307       mask[masked[j]] = maskcount++;
3308     }
3309     /* set "a" values into matrix */
3310     ishift = bs2*a->i[i];
3311     for (j=0; j<bs; j++) {
3312       kmax = rowlengths[i*bs+j];
3313       for (k=0; k<kmax; k++) {
3314         tmp       = jj[nzcountb]/bs;
3315         block     = mask[tmp] - 1;
3316         point     = jj[nzcountb] - bs*tmp;
3317         idx       = ishift + bs2*block + j + bs*point;
3318         a->a[idx] = (MatScalar)aa[nzcountb++];
3319       }
3320     }
3321     /* zero out the mask elements we set */
3322     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3323   }
3324   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3325 
3326   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3327   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3328   ierr = PetscFree(aa);CHKERRQ(ierr);
3329   ierr = PetscFree(jj);CHKERRQ(ierr);
3330   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3331 
3332   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3333   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 /*@C
3338    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3339    compressed row) format.  For good matrix assembly performance the
3340    user should preallocate the matrix storage by setting the parameter nz
3341    (or the array nnz).  By setting these parameters accurately, performance
3342    during matrix assembly can be increased by more than a factor of 50.
3343 
3344    Collective
3345 
3346    Input Parameters:
3347 +  comm - MPI communicator, set to PETSC_COMM_SELF
3348 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3349           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3350 .  m - number of rows
3351 .  n - number of columns
3352 .  nz - number of nonzero blocks  per block row (same for all rows)
3353 -  nnz - array containing the number of nonzero blocks in the various block rows
3354          (possibly different for each block row) or NULL
3355 
3356    Output Parameter:
3357 .  A - the matrix
3358 
3359    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3360    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3361    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3362 
3363    Options Database Keys:
3364 .   -mat_no_unroll - uses code that does not unroll the loops in the
3365                      block calculations (much slower)
3366 .    -mat_block_size - size of the blocks to use
3367 
3368    Level: intermediate
3369 
3370    Notes:
3371    The number of rows and columns must be divisible by blocksize.
3372 
3373    If the nnz parameter is given then the nz parameter is ignored
3374 
3375    A nonzero block is any block that as 1 or more nonzeros in it
3376 
3377    The block AIJ format is fully compatible with standard Fortran 77
3378    storage.  That is, the stored row and column indices can begin at
3379    either one (as in Fortran) or zero.  See the users' manual for details.
3380 
3381    Specify the preallocated storage with either nz or nnz (not both).
3382    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3383    allocation.  See Users-Manual: ch_mat for details.
3384    matrices.
3385 
3386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3387 @*/
3388 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3389 {
3390   PetscErrorCode ierr;
3391 
3392   PetscFunctionBegin;
3393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3394   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3395   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3396   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 /*@C
3401    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3402    per row in the matrix. For good matrix assembly performance the
3403    user should preallocate the matrix storage by setting the parameter nz
3404    (or the array nnz).  By setting these parameters accurately, performance
3405    during matrix assembly can be increased by more than a factor of 50.
3406 
3407    Collective
3408 
3409    Input Parameters:
3410 +  B - the matrix
3411 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3412           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3413 .  nz - number of block nonzeros per block row (same for all rows)
3414 -  nnz - array containing the number of block nonzeros in the various block rows
3415          (possibly different for each block row) or NULL
3416 
3417    Options Database Keys:
3418 .   -mat_no_unroll - uses code that does not unroll the loops in the
3419                      block calculations (much slower)
3420 .   -mat_block_size - size of the blocks to use
3421 
3422    Level: intermediate
3423 
3424    Notes:
3425    If the nnz parameter is given then the nz parameter is ignored
3426 
3427    You can call MatGetInfo() to get information on how effective the preallocation was;
3428    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3429    You can also run with the option -info and look for messages with the string
3430    malloc in them to see if additional memory allocation was needed.
3431 
3432    The block AIJ format is fully compatible with standard Fortran 77
3433    storage.  That is, the stored row and column indices can begin at
3434    either one (as in Fortran) or zero.  See the users' manual for details.
3435 
3436    Specify the preallocated storage with either nz or nnz (not both).
3437    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3438    allocation.  See Users-Manual: ch_mat for details.
3439 
3440 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3441 @*/
3442 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3443 {
3444   PetscErrorCode ierr;
3445 
3446   PetscFunctionBegin;
3447   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3448   PetscValidType(B,1);
3449   PetscValidLogicalCollectiveInt(B,bs,2);
3450   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 /*@C
3455    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3456    (the default sequential PETSc format).
3457 
3458    Collective
3459 
3460    Input Parameters:
3461 +  B - the matrix
3462 .  i - the indices into j for the start of each local row (starts with zero)
3463 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3464 -  v - optional values in the matrix
3465 
3466    Level: developer
3467 
3468    Notes:
3469    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3470    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3471    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3472    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3473    block column and the second index is over columns within a block.
3474 
3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3476 @*/
3477 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3478 {
3479   PetscErrorCode ierr;
3480 
3481   PetscFunctionBegin;
3482   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3483   PetscValidType(B,1);
3484   PetscValidLogicalCollectiveInt(B,bs,2);
3485   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 
3490 /*@
3491      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3492 
3493      Collective
3494 
3495    Input Parameters:
3496 +  comm - must be an MPI communicator of size 1
3497 .  bs - size of block
3498 .  m - number of rows
3499 .  n - number of columns
3500 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3501 .  j - column indices
3502 -  a - matrix values
3503 
3504    Output Parameter:
3505 .  mat - the matrix
3506 
3507    Level: advanced
3508 
3509    Notes:
3510        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3511     once the matrix is destroyed
3512 
3513        You cannot set new nonzero locations into this matrix, that will generate an error.
3514 
3515        The i and j indices are 0 based
3516 
3517        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3518 
3519       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3520       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3521       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3522       with column-major ordering within blocks.
3523 
3524 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3525 
3526 @*/
3527 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3528 {
3529   PetscErrorCode ierr;
3530   PetscInt       ii;
3531   Mat_SeqBAIJ    *baij;
3532 
3533   PetscFunctionBegin;
3534   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3535   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3536 
3537   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3538   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3539   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3540   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3541   baij = (Mat_SeqBAIJ*)(*mat)->data;
3542   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3543   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3544 
3545   baij->i = i;
3546   baij->j = j;
3547   baij->a = a;
3548 
3549   baij->singlemalloc = PETSC_FALSE;
3550   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3551   baij->free_a       = PETSC_FALSE;
3552   baij->free_ij      = PETSC_FALSE;
3553 
3554   for (ii=0; ii<m; ii++) {
3555     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3556 #if defined(PETSC_USE_DEBUG)
3557     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3558 #endif
3559   }
3560 #if defined(PETSC_USE_DEBUG)
3561   for (ii=0; ii<baij->i[m]; ii++) {
3562     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3563     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3564   }
3565 #endif
3566 
3567   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3573 {
3574   PetscErrorCode ierr;
3575   PetscMPIInt    size;
3576 
3577   PetscFunctionBegin;
3578   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3579   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3580     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3581   } else {
3582     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3583   }
3584   PetscFunctionReturn(0);
3585 }
3586