xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d545763cb672d537e48081282d5f2c0ca0cd428b)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     PetscInt n;
1051     n = PetscMin(a->mbs, a->nbs);
1052     diag = a->diag;
1053     for (i=0; i<n; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_HYPRE)
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235 {
1236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_ROW_ORIENTED:
1242     a->roworiented = flg;
1243     break;
1244   case MAT_KEEP_NONZERO_PATTERN:
1245     a->keepnonzeropattern = flg;
1246     break;
1247   case MAT_NEW_NONZERO_LOCATIONS:
1248     a->nonew = (flg ? 0 : 1);
1249     break;
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     a->nonew = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254     a->nonew = (flg ? -2 : 0);
1255     break;
1256   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257     a->nounused = (flg ? -1 : 0);
1258     break;
1259   case MAT_NEW_DIAGONALS:
1260   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261   case MAT_USE_HASH_TABLE:
1262   case MAT_SORTED_FULL:
1263     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1264     break;
1265   case MAT_SPD:
1266   case MAT_SYMMETRIC:
1267   case MAT_STRUCTURALLY_SYMMETRIC:
1268   case MAT_HERMITIAN:
1269   case MAT_SYMMETRY_ETERNAL:
1270   case MAT_SUBMAT_SINGLEIS:
1271   case MAT_STRUCTURE_ONLY:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1282 {
1283   PetscErrorCode ierr;
1284   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1285   MatScalar      *aa_i;
1286   PetscScalar    *v_i;
1287 
1288   PetscFunctionBegin;
1289   bs  = A->rmap->bs;
1290   bs2 = bs*bs;
1291   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1292 
1293   bn  = row/bs;   /* Block number */
1294   bp  = row % bs; /* Block Position */
1295   M   = ai[bn+1] - ai[bn];
1296   *nz = bs*M;
1297 
1298   if (v) {
1299     *v = 0;
1300     if (*nz) {
1301       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1302       for (i=0; i<M; i++) { /* for each block in the block row */
1303         v_i  = *v + i*bs;
1304         aa_i = aa + bs2*(ai[bn] + i);
1305         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1306       }
1307     }
1308   }
1309 
1310   if (idx) {
1311     *idx = 0;
1312     if (*nz) {
1313       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1314       for (i=0; i<M; i++) { /* for each block in the block row */
1315         idx_i = *idx + i*bs;
1316         itmp  = bs*aj[ai[bn] + i];
1317         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1318       }
1319     }
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1340   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1345 {
1346   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1347   Mat            C;
1348   PetscErrorCode ierr;
1349   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1350   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1351   MatScalar      *ata,*aa=a->a;
1352 
1353   PetscFunctionBegin;
1354   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1355   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1356     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1357 
1358     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1359     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1360     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1361     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1362 
1363     at  = (Mat_SeqBAIJ*)C->data;
1364     ati = at->i;
1365     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1366   } else {
1367     C = *B;
1368     at = (Mat_SeqBAIJ*)C->data;
1369     ati = at->i;
1370   }
1371 
1372   atj = at->j;
1373   ata = at->a;
1374 
1375   /* Copy ati into atfill so we have locations of the next free space in atj */
1376   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1377 
1378   /* Walk through A row-wise and mark nonzero entries of A^T. */
1379   for (i=0; i<mbs; i++) {
1380     anzj = ai[i+1] - ai[i];
1381     for (j=0; j<anzj; j++) {
1382       atj[atfill[*aj]] = i;
1383       for (kr=0; kr<bs; kr++) {
1384         for (k=0; k<bs; k++) {
1385           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1386         }
1387       }
1388       atfill[*aj++] += 1;
1389     }
1390   }
1391   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393 
1394   /* Clean up temporary space and complete requests. */
1395   ierr = PetscFree(atfill);CHKERRQ(ierr);
1396 
1397   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1398     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1399     *B = C;
1400   } else {
1401     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1420 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1423   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1424   PetscInt       *rowlens,*colidxs;
1425   PetscScalar    *matvals;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1430 
1431   M  = mat->rmap->N;
1432   N  = mat->cmap->N;
1433   m  = mat->rmap->n;
1434   bs = mat->rmap->bs;
1435   nz = bs*bs*A->nz;
1436 
1437   /* write matrix header */
1438   header[0] = MAT_FILE_CLASSID;
1439   header[1] = M; header[2] = N; header[3] = nz;
1440   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1441 
1442   /* store row lengths */
1443   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1444   for (cnt=0, i=0; i<A->mbs; i++)
1445     for (j=0; j<bs; j++)
1446       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1447   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1448   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1449 
1450   /* store column indices  */
1451   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1452   for (cnt=0, i=0; i<A->mbs; i++)
1453     for (k=0; k<bs; k++)
1454       for (j=A->i[i]; j<A->i[i+1]; j++)
1455         for (l=0; l<bs; l++)
1456           colidxs[cnt++] = bs*A->j[j] + l;
1457   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1458   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1459   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1460 
1461   /* store nonzero values */
1462   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1463   for (cnt=0, i=0; i<A->mbs; i++)
1464     for (k=0; k<bs; k++)
1465       for (j=A->i[i]; j<A->i[i+1]; j++)
1466         for (l=0; l<bs; l++)
1467           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1468   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1469   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1470   ierr = PetscFree(matvals);CHKERRQ(ierr);
1471 
1472   /* write block size option to the viewer's .info file */
1473   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1478 {
1479   PetscErrorCode ierr;
1480   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1481   PetscInt       i,bs = A->rmap->bs,k;
1482 
1483   PetscFunctionBegin;
1484   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1485   for (i=0; i<a->mbs; i++) {
1486     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1487     for (k=a->i[i]; k<a->i[i+1]; k++) {
1488       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1489     }
1490     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1491   }
1492   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1497 {
1498   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1499   PetscErrorCode    ierr;
1500   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1501   PetscViewerFormat format;
1502 
1503   PetscFunctionBegin;
1504   if (A->structure_only) {
1505     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1506     PetscFunctionReturn(0);
1507   }
1508 
1509   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1510   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1511     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1512   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1513     const char *matname;
1514     Mat        aij;
1515     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1516     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1517     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1518     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1521       PetscFunctionReturn(0);
1522   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1532                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1535                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             if (a->a[bs2*k + l*bs + j] != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1542             }
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1550   } else {
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1552     for (i=0; i<a->mbs; i++) {
1553       for (j=0; j<bs; j++) {
1554         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1555         for (k=a->i[i]; k<a->i[i+1]; k++) {
1556           for (l=0; l<bs; l++) {
1557 #if defined(PETSC_USE_COMPLEX)
1558             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1560                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1563                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             }
1567 #else
1568             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1569 #endif
1570           }
1571         }
1572         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1573       }
1574     }
1575     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1576   }
1577   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #include <petscdraw.h>
1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1583 {
1584   Mat               A = (Mat) Aa;
1585   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1586   PetscErrorCode    ierr;
1587   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1588   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1589   MatScalar         *aa;
1590   PetscViewer       viewer;
1591   PetscViewerFormat format;
1592 
1593   PetscFunctionBegin;
1594   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1596   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1597 
1598   /* loop over matrix elements drawing boxes */
1599 
1600   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1601     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1602     /* Blue for negative, Cyan for zero and  Red for positive */
1603     color = PETSC_DRAW_BLUE;
1604     for (i=0,row=0; i<mbs; i++,row+=bs) {
1605       for (j=a->i[i]; j<a->i[i+1]; j++) {
1606         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1607         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1608         aa  = a->a + j*bs2;
1609         for (k=0; k<bs; k++) {
1610           for (l=0; l<bs; l++) {
1611             if (PetscRealPart(*aa++) >=  0.) continue;
1612             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1613           }
1614         }
1615       }
1616     }
1617     color = PETSC_DRAW_CYAN;
1618     for (i=0,row=0; i<mbs; i++,row+=bs) {
1619       for (j=a->i[i]; j<a->i[i+1]; j++) {
1620         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1621         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1622         aa  = a->a + j*bs2;
1623         for (k=0; k<bs; k++) {
1624           for (l=0; l<bs; l++) {
1625             if (PetscRealPart(*aa++) != 0.) continue;
1626             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1627           }
1628         }
1629       }
1630     }
1631     color = PETSC_DRAW_RED;
1632     for (i=0,row=0; i<mbs; i++,row+=bs) {
1633       for (j=a->i[i]; j<a->i[i+1]; j++) {
1634         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1635         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1636         aa  = a->a + j*bs2;
1637         for (k=0; k<bs; k++) {
1638           for (l=0; l<bs; l++) {
1639             if (PetscRealPart(*aa++) <= 0.) continue;
1640             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1641           }
1642         }
1643       }
1644     }
1645     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1646   } else {
1647     /* use contour shading to indicate magnitude of values */
1648     /* first determine max of all nonzero values */
1649     PetscReal minv = 0.0, maxv = 0.0;
1650     PetscDraw popup;
1651 
1652     for (i=0; i<a->nz*a->bs2; i++) {
1653       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1654     }
1655     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1656     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1657     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1658 
1659     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1660     for (i=0,row=0; i<mbs; i++,row+=bs) {
1661       for (j=a->i[i]; j<a->i[i+1]; j++) {
1662         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1663         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1664         aa  = a->a + j*bs2;
1665         for (k=0; k<bs; k++) {
1666           for (l=0; l<bs; l++) {
1667             MatScalar v = *aa++;
1668             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1669             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1670           }
1671         }
1672       }
1673     }
1674     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1680 {
1681   PetscErrorCode ierr;
1682   PetscReal      xl,yl,xr,yr,w,h;
1683   PetscDraw      draw;
1684   PetscBool      isnull;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1688   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1689   if (isnull) PetscFunctionReturn(0);
1690 
1691   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1692   xr  += w;          yr += h;        xl = -w;     yl = -h;
1693   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1694   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1695   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1696   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1697   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1702 {
1703   PetscErrorCode ierr;
1704   PetscBool      iascii,isbinary,isdraw;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1709   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1710   if (iascii) {
1711     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1712   } else if (isbinary) {
1713     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1714   } else if (isdraw) {
1715     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1716   } else {
1717     Mat B;
1718     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1719     ierr = MatView(B,viewer);CHKERRQ(ierr);
1720     ierr = MatDestroy(&B);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 
1726 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1727 {
1728   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1729   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1730   PetscInt    *ai = a->i,*ailen = a->ilen;
1731   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1732   MatScalar   *ap,*aa = a->a;
1733 
1734   PetscFunctionBegin;
1735   for (k=0; k<m; k++) { /* loop over rows */
1736     row = im[k]; brow = row/bs;
1737     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1738     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1739     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1740     nrow = ailen[brow];
1741     for (l=0; l<n; l++) { /* loop over columns */
1742       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1743       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1744       col  = in[l];
1745       bcol = col/bs;
1746       cidx = col%bs;
1747       ridx = row%bs;
1748       high = nrow;
1749       low  = 0; /* assume unsorted */
1750       while (high-low > 5) {
1751         t = (low+high)/2;
1752         if (rp[t] > bcol) high = t;
1753         else             low  = t;
1754       }
1755       for (i=low; i<high; i++) {
1756         if (rp[i] > bcol) break;
1757         if (rp[i] == bcol) {
1758           *v++ = ap[bs2*i+bs*cidx+ridx];
1759           goto finished;
1760         }
1761       }
1762       *v++ = 0.0;
1763 finished:;
1764     }
1765   }
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1770 {
1771   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1772   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1773   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1774   PetscErrorCode    ierr;
1775   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1776   PetscBool         roworiented=a->roworiented;
1777   const PetscScalar *value     = v;
1778   MatScalar         *ap=NULL,*aa = a->a,*bap;
1779 
1780   PetscFunctionBegin;
1781   if (roworiented) {
1782     stepval = (n-1)*bs;
1783   } else {
1784     stepval = (m-1)*bs;
1785   }
1786   for (k=0; k<m; k++) { /* loop over added rows */
1787     row = im[k];
1788     if (row < 0) continue;
1789 #if defined(PETSC_USE_DEBUG)
1790     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1791 #endif
1792     rp   = aj + ai[row];
1793     if (!A->structure_only) ap = aa + bs2*ai[row];
1794     rmax = imax[row];
1795     nrow = ailen[row];
1796     low  = 0;
1797     high = nrow;
1798     for (l=0; l<n; l++) { /* loop over added columns */
1799       if (in[l] < 0) continue;
1800 #if defined(PETSC_USE_DEBUG)
1801       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1802 #endif
1803       col = in[l];
1804       if (!A->structure_only) {
1805         if (roworiented) {
1806           value = v + (k*(stepval+bs) + l)*bs;
1807         } else {
1808           value = v + (l*(stepval+bs) + k)*bs;
1809         }
1810       }
1811       if (col <= lastcol) low = 0;
1812       else high = nrow;
1813       lastcol = col;
1814       while (high-low > 7) {
1815         t = (low+high)/2;
1816         if (rp[t] > col) high = t;
1817         else             low  = t;
1818       }
1819       for (i=low; i<high; i++) {
1820         if (rp[i] > col) break;
1821         if (rp[i] == col) {
1822           if (A->structure_only) goto noinsert2;
1823           bap = ap +  bs2*i;
1824           if (roworiented) {
1825             if (is == ADD_VALUES) {
1826               for (ii=0; ii<bs; ii++,value+=stepval) {
1827                 for (jj=ii; jj<bs2; jj+=bs) {
1828                   bap[jj] += *value++;
1829                 }
1830               }
1831             } else {
1832               for (ii=0; ii<bs; ii++,value+=stepval) {
1833                 for (jj=ii; jj<bs2; jj+=bs) {
1834                   bap[jj] = *value++;
1835                 }
1836               }
1837             }
1838           } else {
1839             if (is == ADD_VALUES) {
1840               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1841                 for (jj=0; jj<bs; jj++) {
1842                   bap[jj] += value[jj];
1843                 }
1844                 bap += bs;
1845               }
1846             } else {
1847               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   bap[jj]  = value[jj];
1850                 }
1851                 bap += bs;
1852               }
1853             }
1854           }
1855           goto noinsert2;
1856         }
1857       }
1858       if (nonew == 1) goto noinsert2;
1859       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1860       if (A->structure_only) {
1861         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1862       } else {
1863         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1864       }
1865       N = nrow++ - 1; high++;
1866       /* shift up all the later entries in this row */
1867       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1868       rp[i] = col;
1869       if (!A->structure_only) {
1870         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1871         bap   = ap +  bs2*i;
1872         if (roworiented) {
1873           for (ii=0; ii<bs; ii++,value+=stepval) {
1874             for (jj=ii; jj<bs2; jj+=bs) {
1875               bap[jj] = *value++;
1876             }
1877           }
1878         } else {
1879           for (ii=0; ii<bs; ii++,value+=stepval) {
1880             for (jj=0; jj<bs; jj++) {
1881               *bap++ = *value++;
1882             }
1883           }
1884         }
1885       }
1886 noinsert2:;
1887       low = i;
1888     }
1889     ailen[row] = nrow;
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1895 {
1896   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1897   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1898   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1899   PetscErrorCode ierr;
1900   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1901   MatScalar      *aa  = a->a,*ap;
1902   PetscReal      ratio=0.6;
1903 
1904   PetscFunctionBegin;
1905   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1906 
1907   if (m) rmax = ailen[0];
1908   for (i=1; i<mbs; i++) {
1909     /* move each row back by the amount of empty slots (fshift) before it*/
1910     fshift += imax[i-1] - ailen[i-1];
1911     rmax    = PetscMax(rmax,ailen[i]);
1912     if (fshift) {
1913       ip = aj + ai[i];
1914       ap = aa + bs2*ai[i];
1915       N  = ailen[i];
1916       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1917       if (!A->structure_only) {
1918         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1919       }
1920     }
1921     ai[i] = ai[i-1] + ailen[i-1];
1922   }
1923   if (mbs) {
1924     fshift += imax[mbs-1] - ailen[mbs-1];
1925     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1926   }
1927 
1928   /* reset ilen and imax for each row */
1929   a->nonzerorowcnt = 0;
1930   if (A->structure_only) {
1931     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1932   } else { /* !A->structure_only */
1933     for (i=0; i<mbs; i++) {
1934       ailen[i] = imax[i] = ai[i+1] - ai[i];
1935       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1936     }
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1951 
1952   A->info.mallocs    += a->reallocs;
1953   a->reallocs         = 0;
1954   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1955   a->rmax             = rmax;
1956 
1957   if (!A->structure_only) {
1958     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 /*
1964    This function returns an array of flags which indicate the locations of contiguous
1965    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1966    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1967    Assume: sizes should be long enough to hold all the values.
1968 */
1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1970 {
1971   PetscInt  i,j,k,row;
1972   PetscBool flg;
1973 
1974   PetscFunctionBegin;
1975   for (i=0,j=0; i<n; j++) {
1976     row = idx[i];
1977     if (row%bs!=0) { /* Not the begining of a block */
1978       sizes[j] = 1;
1979       i++;
1980     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1981       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1982       i++;
1983     } else { /* Begining of the block, so check if the complete block exists */
1984       flg = PETSC_TRUE;
1985       for (k=1; k<bs; k++) {
1986         if (row+k != idx[i+k]) { /* break in the block */
1987           flg = PETSC_FALSE;
1988           break;
1989         }
1990       }
1991       if (flg) { /* No break in the bs */
1992         sizes[j] = bs;
1993         i       += bs;
1994       } else {
1995         sizes[j] = 1;
1996         i++;
1997       }
1998     }
1999   }
2000   *bs_max = j;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2005 {
2006   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2007   PetscErrorCode    ierr;
2008   PetscInt          i,j,k,count,*rows;
2009   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2010   PetscScalar       zero = 0.0;
2011   MatScalar         *aa;
2012   const PetscScalar *xx;
2013   PetscScalar       *bb;
2014 
2015   PetscFunctionBegin;
2016   /* fix right hand side if needed */
2017   if (x && b) {
2018     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2019     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2020     for (i=0; i<is_n; i++) {
2021       bb[is_idx[i]] = diag*xx[is_idx[i]];
2022     }
2023     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2024     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2025   }
2026 
2027   /* Make a copy of the IS and  sort it */
2028   /* allocate memory for rows,sizes */
2029   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2030 
2031   /* copy IS values to rows, and sort them */
2032   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2033   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2034 
2035   if (baij->keepnonzeropattern) {
2036     for (i=0; i<is_n; i++) sizes[i] = 1;
2037     bs_max          = is_n;
2038   } else {
2039     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2040     A->nonzerostate++;
2041   }
2042 
2043   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2044     row = rows[j];
2045     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2046     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2047     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2048     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2049       if (diag != (PetscScalar)0.0) {
2050         if (baij->ilen[row/bs] > 0) {
2051           baij->ilen[row/bs]       = 1;
2052           baij->j[baij->i[row/bs]] = row/bs;
2053 
2054           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2055         }
2056         /* Now insert all the diagonal values for this bs */
2057         for (k=0; k<bs; k++) {
2058           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059         }
2060       } else { /* (diag == 0.0) */
2061         baij->ilen[row/bs] = 0;
2062       } /* end (diag == 0.0) */
2063     } else { /* (sizes[i] != bs) */
2064 #if defined(PETSC_USE_DEBUG)
2065       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2066 #endif
2067       for (k=0; k<count; k++) {
2068         aa[0] =  zero;
2069         aa   += bs;
2070       }
2071       if (diag != (PetscScalar)0.0) {
2072         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2073       }
2074     }
2075   }
2076 
2077   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2083 {
2084   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2085   PetscErrorCode    ierr;
2086   PetscInt          i,j,k,count;
2087   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2088   PetscScalar       zero = 0.0;
2089   MatScalar         *aa;
2090   const PetscScalar *xx;
2091   PetscScalar       *bb;
2092   PetscBool         *zeroed,vecs = PETSC_FALSE;
2093 
2094   PetscFunctionBegin;
2095   /* fix right hand side if needed */
2096   if (x && b) {
2097     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2098     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2099     vecs = PETSC_TRUE;
2100   }
2101 
2102   /* zero the columns */
2103   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2104   for (i=0; i<is_n; i++) {
2105     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2106     zeroed[is_idx[i]] = PETSC_TRUE;
2107   }
2108   for (i=0; i<A->rmap->N; i++) {
2109     if (!zeroed[i]) {
2110       row = i/bs;
2111       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2112         for (k=0; k<bs; k++) {
2113           col = bs*baij->j[j] + k;
2114           if (zeroed[col]) {
2115             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2116             if (vecs) bb[i] -= aa[0]*xx[col];
2117             aa[0] = 0.0;
2118           }
2119         }
2120       }
2121     } else if (vecs) bb[i] = diag*xx[i];
2122   }
2123   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2124   if (vecs) {
2125     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2126     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2127   }
2128 
2129   /* zero the rows */
2130   for (i=0; i<is_n; i++) {
2131     row   = is_idx[i];
2132     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2133     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2134     for (k=0; k<count; k++) {
2135       aa[0] =  zero;
2136       aa   += bs;
2137     }
2138     if (diag != (PetscScalar)0.0) {
2139       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2140     }
2141   }
2142   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2147 {
2148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2149   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2150   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2151   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2152   PetscErrorCode ierr;
2153   PetscInt       ridx,cidx,bs2=a->bs2;
2154   PetscBool      roworiented=a->roworiented;
2155   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2156 
2157   PetscFunctionBegin;
2158   for (k=0; k<m; k++) { /* loop over added rows */
2159     row  = im[k];
2160     brow = row/bs;
2161     if (row < 0) continue;
2162 #if defined(PETSC_USE_DEBUG)
2163     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2164 #endif
2165     rp   = aj + ai[brow];
2166     if (!A->structure_only) ap = aa + bs2*ai[brow];
2167     rmax = imax[brow];
2168     nrow = ailen[brow];
2169     low  = 0;
2170     high = nrow;
2171     for (l=0; l<n; l++) { /* loop over added columns */
2172       if (in[l] < 0) continue;
2173 #if defined(PETSC_USE_DEBUG)
2174       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2175 #endif
2176       col  = in[l]; bcol = col/bs;
2177       ridx = row % bs; cidx = col % bs;
2178       if (!A->structure_only) {
2179         if (roworiented) {
2180           value = v[l + k*n];
2181         } else {
2182           value = v[k + l*m];
2183         }
2184       }
2185       if (col <= lastcol) low = 0; else high = nrow;
2186       lastcol = col;
2187       while (high-low > 7) {
2188         t = (low+high)/2;
2189         if (rp[t] > bcol) high = t;
2190         else              low  = t;
2191       }
2192       for (i=low; i<high; i++) {
2193         if (rp[i] > bcol) break;
2194         if (rp[i] == bcol) {
2195           bap = ap +  bs2*i + bs*cidx + ridx;
2196           if (!A->structure_only) {
2197             if (is == ADD_VALUES) *bap += value;
2198             else                  *bap  = value;
2199           }
2200           goto noinsert1;
2201         }
2202       }
2203       if (nonew == 1) goto noinsert1;
2204       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2205       if (A->structure_only) {
2206         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2207       } else {
2208         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2209       }
2210       N = nrow++ - 1; high++;
2211       /* shift up all the later entries in this row */
2212       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2213       rp[i] = bcol;
2214       if (!A->structure_only) {
2215         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2216         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2217         ap[bs2*i + bs*cidx + ridx] = value;
2218       }
2219       a->nz++;
2220       A->nonzerostate++;
2221 noinsert1:;
2222       low = i;
2223     }
2224     ailen[brow] = nrow;
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2230 {
2231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2232   Mat            outA;
2233   PetscErrorCode ierr;
2234   PetscBool      row_identity,col_identity;
2235 
2236   PetscFunctionBegin;
2237   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2238   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2239   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2240   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2241 
2242   outA            = inA;
2243   inA->factortype = MAT_FACTOR_LU;
2244   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2245   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2246 
2247   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2248 
2249   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2250   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2251   a->row = row;
2252   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2254   a->col = col;
2255 
2256   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2257   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2258   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2259   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2260 
2261   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2262   if (!a->solve_work) {
2263     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2264     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2265   }
2266   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2271 {
2272   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2273   PetscInt    i,nz,mbs;
2274 
2275   PetscFunctionBegin;
2276   nz  = baij->maxnz;
2277   mbs = baij->mbs;
2278   for (i=0; i<nz; i++) {
2279     baij->j[i] = indices[i];
2280   }
2281   baij->nz = nz;
2282   for (i=0; i<mbs; i++) {
2283     baij->ilen[i] = baij->imax[i];
2284   }
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 /*@
2289     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2290        in the matrix.
2291 
2292   Input Parameters:
2293 +  mat - the SeqBAIJ matrix
2294 -  indices - the column indices
2295 
2296   Level: advanced
2297 
2298   Notes:
2299     This can be called if you have precomputed the nonzero structure of the
2300   matrix and want to provide it to the matrix object to improve the performance
2301   of the MatSetValues() operation.
2302 
2303     You MUST have set the correct numbers of nonzeros per row in the call to
2304   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2305 
2306     MUST be called before any calls to MatSetValues();
2307 
2308 @*/
2309 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2315   PetscValidPointer(indices,2);
2316   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2321 {
2322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2323   PetscErrorCode ierr;
2324   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2325   PetscReal      atmp;
2326   PetscScalar    *x,zero = 0.0;
2327   MatScalar      *aa;
2328   PetscInt       ncols,brow,krow,kcol;
2329 
2330   PetscFunctionBegin;
2331   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2332   bs  = A->rmap->bs;
2333   aa  = a->a;
2334   ai  = a->i;
2335   aj  = a->j;
2336   mbs = a->mbs;
2337 
2338   ierr = VecSet(v,zero);CHKERRQ(ierr);
2339   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2340   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2341   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2342   for (i=0; i<mbs; i++) {
2343     ncols = ai[1] - ai[0]; ai++;
2344     brow  = bs*i;
2345     for (j=0; j<ncols; j++) {
2346       for (kcol=0; kcol<bs; kcol++) {
2347         for (krow=0; krow<bs; krow++) {
2348           atmp = PetscAbsScalar(*aa);aa++;
2349           row  = brow + krow;   /* row index */
2350           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2351         }
2352       }
2353       aj++;
2354     }
2355   }
2356   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   /* If the two matrices have the same copy implementation, use fast copy. */
2366   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2367     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2368     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2369     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2370 
2371     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2372     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2373     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2374     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2375   } else {
2376     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2391 {
2392   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2393 
2394   PetscFunctionBegin;
2395   *array = a->a;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2400 {
2401   PetscFunctionBegin;
2402   *array = NULL;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2407 {
2408   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2409   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2410   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   /* Set the number of nonzeros in the new matrix */
2415   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2420 {
2421   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2424   PetscBLASInt   one=1;
2425 
2426   PetscFunctionBegin;
2427   if (str == SAME_NONZERO_PATTERN) {
2428     PetscScalar  alpha = a;
2429     PetscBLASInt bnz;
2430     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2431     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2432     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2433   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2434     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2435   } else {
2436     Mat      B;
2437     PetscInt *nnz;
2438     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2439     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2440     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2441     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2442     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2443     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2444     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2445     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2446     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2447     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2448     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2449     ierr = PetscFree(nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2455 {
2456   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2457   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2458   MatScalar   *aa = a->a;
2459 
2460   PetscFunctionBegin;
2461   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2466 {
2467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2468   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2469   MatScalar   *aa = a->a;
2470 
2471   PetscFunctionBegin;
2472   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 /*
2477     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2478 */
2479 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2480 {
2481   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2482   PetscErrorCode ierr;
2483   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2484   PetscInt       nz = a->i[m],row,*jj,mr,col;
2485 
2486   PetscFunctionBegin;
2487   *nn = n;
2488   if (!ia) PetscFunctionReturn(0);
2489   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2490   else {
2491     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2492     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2493     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2494     jj   = a->j;
2495     for (i=0; i<nz; i++) {
2496       collengths[jj[i]]++;
2497     }
2498     cia[0] = oshift;
2499     for (i=0; i<n; i++) {
2500       cia[i+1] = cia[i] + collengths[i];
2501     }
2502     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2503     jj   = a->j;
2504     for (row=0; row<m; row++) {
2505       mr = a->i[row+1] - a->i[row];
2506       for (i=0; i<mr; i++) {
2507         col = *jj++;
2508 
2509         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2510       }
2511     }
2512     ierr = PetscFree(collengths);CHKERRQ(ierr);
2513     *ia  = cia; *ja = cja;
2514   }
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2519 {
2520   PetscErrorCode ierr;
2521 
2522   PetscFunctionBegin;
2523   if (!ia) PetscFunctionReturn(0);
2524   ierr = PetscFree(*ia);CHKERRQ(ierr);
2525   ierr = PetscFree(*ja);CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 /*
2530  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2531  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2532  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2533  */
2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2535 {
2536   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2537   PetscErrorCode ierr;
2538   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2539   PetscInt       nz = a->i[m],row,*jj,mr,col;
2540   PetscInt       *cspidx;
2541 
2542   PetscFunctionBegin;
2543   *nn = n;
2544   if (!ia) PetscFunctionReturn(0);
2545 
2546   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2547   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2550   jj   = a->j;
2551   for (i=0; i<nz; i++) {
2552     collengths[jj[i]]++;
2553   }
2554   cia[0] = oshift;
2555   for (i=0; i<n; i++) {
2556     cia[i+1] = cia[i] + collengths[i];
2557   }
2558   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2559   jj   = a->j;
2560   for (row=0; row<m; row++) {
2561     mr = a->i[row+1] - a->i[row];
2562     for (i=0; i<mr; i++) {
2563       col = *jj++;
2564       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2565       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2566     }
2567   }
2568   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2569   *ia    = cia;
2570   *ja    = cja;
2571   *spidx = cspidx;
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2576 {
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2581   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2589 
2590   PetscFunctionBegin;
2591   if (!Y->preallocated || !aij->nz) {
2592     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2593   }
2594   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /* -------------------------------------------------------------------*/
2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2600                                        MatGetRow_SeqBAIJ,
2601                                        MatRestoreRow_SeqBAIJ,
2602                                        MatMult_SeqBAIJ_N,
2603                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2604                                        MatMultTranspose_SeqBAIJ,
2605                                        MatMultTransposeAdd_SeqBAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                /* 10*/ 0,
2610                                        MatLUFactor_SeqBAIJ,
2611                                        0,
2612                                        0,
2613                                        MatTranspose_SeqBAIJ,
2614                                /* 15*/ MatGetInfo_SeqBAIJ,
2615                                        MatEqual_SeqBAIJ,
2616                                        MatGetDiagonal_SeqBAIJ,
2617                                        MatDiagonalScale_SeqBAIJ,
2618                                        MatNorm_SeqBAIJ,
2619                                /* 20*/ 0,
2620                                        MatAssemblyEnd_SeqBAIJ,
2621                                        MatSetOption_SeqBAIJ,
2622                                        MatZeroEntries_SeqBAIJ,
2623                                /* 24*/ MatZeroRows_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 29*/ MatSetUp_SeqBAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                /* 34*/ MatDuplicate_SeqBAIJ,
2634                                        0,
2635                                        0,
2636                                        MatILUFactor_SeqBAIJ,
2637                                        0,
2638                                /* 39*/ MatAXPY_SeqBAIJ,
2639                                        MatCreateSubMatrices_SeqBAIJ,
2640                                        MatIncreaseOverlap_SeqBAIJ,
2641                                        MatGetValues_SeqBAIJ,
2642                                        MatCopy_SeqBAIJ,
2643                                /* 44*/ 0,
2644                                        MatScale_SeqBAIJ,
2645                                        MatShift_SeqBAIJ,
2646                                        0,
2647                                        MatZeroRowsColumns_SeqBAIJ,
2648                                /* 49*/ 0,
2649                                        MatGetRowIJ_SeqBAIJ,
2650                                        MatRestoreRowIJ_SeqBAIJ,
2651                                        MatGetColumnIJ_SeqBAIJ,
2652                                        MatRestoreColumnIJ_SeqBAIJ,
2653                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        MatSetValuesBlocked_SeqBAIJ,
2658                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2659                                        MatDestroy_SeqBAIJ,
2660                                        MatView_SeqBAIJ,
2661                                        0,
2662                                        0,
2663                                /* 64*/ 0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2669                                        0,
2670                                        MatConvert_Basic,
2671                                        0,
2672                                        0,
2673                                /* 74*/ 0,
2674                                        MatFDColoringApply_BAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /* 79*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        MatLoad_SeqBAIJ,
2683                                /* 84*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 89*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 94*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /* 99*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /*104*/ 0,
2704                                        MatRealPart_SeqBAIJ,
2705                                        MatImaginaryPart_SeqBAIJ,
2706                                        0,
2707                                        0,
2708                                /*109*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatMissingDiagonal_SeqBAIJ,
2713                                /*114*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*119*/ 0,
2719                                        0,
2720                                        MatMultHermitianTranspose_SeqBAIJ,
2721                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2722                                        0,
2723                                /*124*/ 0,
2724                                        0,
2725                                        MatInvertBlockDiagonal_SeqBAIJ,
2726                                        0,
2727                                        0,
2728                                /*129*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*134*/ 0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                /*139*/ MatSetBlockSizes_Default,
2739                                        0,
2740                                        0,
2741                                        MatFDColoringSetUp_SeqXAIJ,
2742                                        0,
2743                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2744                                        MatDestroySubMatrices_SeqBAIJ
2745 };
2746 
2747 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2748 {
2749   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2750   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2755 
2756   /* allocate space for values if not already there */
2757   if (!aij->saved_values) {
2758     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2760   }
2761 
2762   /* copy values over */
2763   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2768 {
2769   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2770   PetscErrorCode ierr;
2771   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2772 
2773   PetscFunctionBegin;
2774   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2775   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2776 
2777   /* copy values over */
2778   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2784 
2785 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2786 {
2787   Mat_SeqBAIJ    *b;
2788   PetscErrorCode ierr;
2789   PetscInt       i,mbs,nbs,bs2;
2790   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2791 
2792   PetscFunctionBegin;
2793   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2794   if (nz == MAT_SKIP_ALLOCATION) {
2795     skipallocation = PETSC_TRUE;
2796     nz             = 0;
2797   }
2798 
2799   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2803 
2804   B->preallocated = PETSC_TRUE;
2805 
2806   mbs = B->rmap->n/bs;
2807   nbs = B->cmap->n/bs;
2808   bs2 = bs*bs;
2809 
2810   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2811 
2812   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2813   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2814   if (nnz) {
2815     for (i=0; i<mbs; i++) {
2816       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2817       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2818     }
2819   }
2820 
2821   b    = (Mat_SeqBAIJ*)B->data;
2822   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2823   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2824   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2825 
2826   if (!flg) {
2827     switch (bs) {
2828     case 1:
2829       B->ops->mult    = MatMult_SeqBAIJ_1;
2830       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2831       break;
2832     case 2:
2833       B->ops->mult    = MatMult_SeqBAIJ_2;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2835       break;
2836     case 3:
2837       B->ops->mult    = MatMult_SeqBAIJ_3;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2839       break;
2840     case 4:
2841       B->ops->mult    = MatMult_SeqBAIJ_4;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2843       break;
2844     case 5:
2845       B->ops->mult    = MatMult_SeqBAIJ_5;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2847       break;
2848     case 6:
2849       B->ops->mult    = MatMult_SeqBAIJ_6;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2851       break;
2852     case 7:
2853       B->ops->mult    = MatMult_SeqBAIJ_7;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2855       break;
2856     case 9:
2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2858       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2860 #else
2861       B->ops->mult    = MatMult_SeqBAIJ_N;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2863 #endif
2864       break;
2865     case 11:
2866       B->ops->mult    = MatMult_SeqBAIJ_11;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2868       break;
2869     case 15:
2870       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2872       break;
2873     default:
2874       B->ops->mult    = MatMult_SeqBAIJ_N;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     }
2878   }
2879   B->ops->sor = MatSOR_SeqBAIJ;
2880   b->mbs = mbs;
2881   b->nbs = nbs;
2882   if (!skipallocation) {
2883     if (!b->imax) {
2884       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2885       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2886 
2887       b->free_imax_ilen = PETSC_TRUE;
2888     }
2889     /* b->ilen will count nonzeros in each block row so far. */
2890     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2891     if (!nnz) {
2892       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2893       else if (nz < 0) nz = 1;
2894       nz = PetscMin(nz,nbs);
2895       for (i=0; i<mbs; i++) b->imax[i] = nz;
2896       nz = nz*mbs;
2897     } else {
2898       PetscInt64 nz64 = 0;
2899       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2900       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2901     }
2902 
2903     /* allocate the matrix space */
2904     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2905     if (B->structure_only) {
2906       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2907       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2908       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2909     } else {
2910       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2911       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2912       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2913     }
2914     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2915 
2916     if (B->structure_only) {
2917       b->singlemalloc = PETSC_FALSE;
2918       b->free_a       = PETSC_FALSE;
2919     } else {
2920       b->singlemalloc = PETSC_TRUE;
2921       b->free_a       = PETSC_TRUE;
2922     }
2923     b->free_ij = PETSC_TRUE;
2924 
2925     b->i[0] = 0;
2926     for (i=1; i<mbs+1; i++) {
2927       b->i[i] = b->i[i-1] + b->imax[i-1];
2928     }
2929 
2930   } else {
2931     b->free_a  = PETSC_FALSE;
2932     b->free_ij = PETSC_FALSE;
2933   }
2934 
2935   b->bs2              = bs2;
2936   b->mbs              = mbs;
2937   b->nz               = 0;
2938   b->maxnz            = nz;
2939   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2940   B->was_assembled    = PETSC_FALSE;
2941   B->assembled        = PETSC_FALSE;
2942   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2947 {
2948   PetscInt       i,m,nz,nz_max=0,*nnz;
2949   PetscScalar    *values=0;
2950   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2951   PetscErrorCode ierr;
2952 
2953   PetscFunctionBegin;
2954   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2955   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2960   m    = B->rmap->n/bs;
2961 
2962   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2963   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2964   for (i=0; i<m; i++) {
2965     nz = ii[i+1]- ii[i];
2966     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2967     nz_max = PetscMax(nz_max, nz);
2968     nnz[i] = nz;
2969   }
2970   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2971   ierr = PetscFree(nnz);CHKERRQ(ierr);
2972 
2973   values = (PetscScalar*)V;
2974   if (!values) {
2975     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2976   }
2977   for (i=0; i<m; i++) {
2978     PetscInt          ncols  = ii[i+1] - ii[i];
2979     const PetscInt    *icols = jj + ii[i];
2980     if (bs == 1 || !roworiented) {
2981       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2982       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2983     } else {
2984       PetscInt j;
2985       for (j=0; j<ncols; j++) {
2986         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2987         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2988       }
2989     }
2990   }
2991   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 /*@C
2999    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3000 
3001    Not Collective
3002 
3003    Input Parameter:
3004 .  mat - a MATSEQBAIJ matrix
3005 
3006    Output Parameter:
3007 .   array - pointer to the data
3008 
3009    Level: intermediate
3010 
3011 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3012 @*/
3013 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3014 {
3015   PetscErrorCode ierr;
3016 
3017   PetscFunctionBegin;
3018   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 /*@C
3023    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3024 
3025    Not Collective
3026 
3027    Input Parameters:
3028 +  mat - a MATSEQBAIJ matrix
3029 -  array - pointer to the data
3030 
3031    Level: intermediate
3032 
3033 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3034 @*/
3035 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3036 {
3037   PetscErrorCode ierr;
3038 
3039   PetscFunctionBegin;
3040   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 /*MC
3045    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3046    block sparse compressed row format.
3047 
3048    Options Database Keys:
3049 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3050 
3051    Level: beginner
3052 
3053    Notes:
3054     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3055     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3056 
3057 .seealso: MatCreateSeqBAIJ()
3058 M*/
3059 
3060 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3061 
3062 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3063 {
3064   PetscErrorCode ierr;
3065   PetscMPIInt    size;
3066   Mat_SeqBAIJ    *b;
3067 
3068   PetscFunctionBegin;
3069   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3070   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3071 
3072   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3073   B->data = (void*)b;
3074   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3075 
3076   b->row          = 0;
3077   b->col          = 0;
3078   b->icol         = 0;
3079   b->reallocs     = 0;
3080   b->saved_values = 0;
3081 
3082   b->roworiented        = PETSC_TRUE;
3083   b->nonew              = 0;
3084   b->diag               = 0;
3085   B->spptr              = 0;
3086   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3087   b->keepnonzeropattern = PETSC_FALSE;
3088 
3089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3100 #if defined(PETSC_HAVE_HYPRE)
3101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3102 #endif
3103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3105   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3110 {
3111   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3112   PetscErrorCode ierr;
3113   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3114 
3115   PetscFunctionBegin;
3116   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3117 
3118   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3119     c->imax           = a->imax;
3120     c->ilen           = a->ilen;
3121     c->free_imax_ilen = PETSC_FALSE;
3122   } else {
3123     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3124     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3125     for (i=0; i<mbs; i++) {
3126       c->imax[i] = a->imax[i];
3127       c->ilen[i] = a->ilen[i];
3128     }
3129     c->free_imax_ilen = PETSC_TRUE;
3130   }
3131 
3132   /* allocate the matrix space */
3133   if (mallocmatspace) {
3134     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3135       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3136       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3137 
3138       c->i            = a->i;
3139       c->j            = a->j;
3140       c->singlemalloc = PETSC_FALSE;
3141       c->free_a       = PETSC_TRUE;
3142       c->free_ij      = PETSC_FALSE;
3143       c->parent       = A;
3144       C->preallocated = PETSC_TRUE;
3145       C->assembled    = PETSC_TRUE;
3146 
3147       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3148       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3149       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3150     } else {
3151       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3152       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3153 
3154       c->singlemalloc = PETSC_TRUE;
3155       c->free_a       = PETSC_TRUE;
3156       c->free_ij      = PETSC_TRUE;
3157 
3158       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3159       if (mbs > 0) {
3160         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3161         if (cpvalues == MAT_COPY_VALUES) {
3162           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3163         } else {
3164           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3165         }
3166       }
3167       C->preallocated = PETSC_TRUE;
3168       C->assembled    = PETSC_TRUE;
3169     }
3170   }
3171 
3172   c->roworiented = a->roworiented;
3173   c->nonew       = a->nonew;
3174 
3175   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3176   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3177 
3178   c->bs2         = a->bs2;
3179   c->mbs         = a->mbs;
3180   c->nbs         = a->nbs;
3181 
3182   if (a->diag) {
3183     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3184       c->diag      = a->diag;
3185       c->free_diag = PETSC_FALSE;
3186     } else {
3187       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3188       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3189       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3190       c->free_diag = PETSC_TRUE;
3191     }
3192   } else c->diag = 0;
3193 
3194   c->nz         = a->nz;
3195   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3196   c->solve_work = NULL;
3197   c->mult_work  = NULL;
3198   c->sor_workt  = NULL;
3199   c->sor_work   = NULL;
3200 
3201   c->compressedrow.use   = a->compressedrow.use;
3202   c->compressedrow.nrows = a->compressedrow.nrows;
3203   if (a->compressedrow.use) {
3204     i    = a->compressedrow.nrows;
3205     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3206     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3207     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3208     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3209   } else {
3210     c->compressedrow.use    = PETSC_FALSE;
3211     c->compressedrow.i      = NULL;
3212     c->compressedrow.rindex = NULL;
3213   }
3214   C->nonzerostate = A->nonzerostate;
3215 
3216   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3221 {
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3226   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3227   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3228   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3233 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3234 {
3235   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3236   PetscInt       *rowidxs,*colidxs;
3237   PetscScalar    *matvals;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3242 
3243   /* read matrix header */
3244   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3245   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3246   M = header[1]; N = header[2]; nz = header[3];
3247   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3248   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3249   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3250 
3251   /* set block sizes from the viewer's .info file */
3252   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3253   /* set local and global sizes if not set already */
3254   if (mat->rmap->n < 0) mat->rmap->n = M;
3255   if (mat->cmap->n < 0) mat->cmap->n = N;
3256   if (mat->rmap->N < 0) mat->rmap->N = M;
3257   if (mat->cmap->N < 0) mat->cmap->N = N;
3258   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3259   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3260 
3261   /* check if the matrix sizes are correct */
3262   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3263   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3264   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3265   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3266   mbs = m/bs; nbs = n/bs;
3267 
3268   /* read in row lengths, column indices and nonzero values */
3269   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3270   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3271   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3272   sum = rowidxs[m];
3273   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3274 
3275   /* read in column indices and nonzero values */
3276   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3277   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3278   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3279 
3280   { /* preallocate matrix storage */
3281     PetscBT   bt; /* helper bit set to count nonzeros */
3282     PetscInt  *nnz;
3283     PetscBool sbaij;
3284 
3285     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3286     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3287     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3288     for (i=0; i<mbs; i++) {
3289       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3290       for (k=0; k<bs; k++) {
3291         PetscInt row = bs*i + k;
3292         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3293           PetscInt col = colidxs[j];
3294           if (!sbaij || col >= row)
3295             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3296         }
3297       }
3298     }
3299     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3300     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3301     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3302     ierr = PetscFree(nnz);CHKERRQ(ierr);
3303   }
3304 
3305   /* store matrix values */
3306   for (i=0; i<m; i++) {
3307     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3308     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3309   }
3310 
3311   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3312   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3313   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3314   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3319 {
3320   PetscErrorCode ierr;
3321   PetscBool      isbinary;
3322 
3323   PetscFunctionBegin;
3324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3325   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3326   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 /*@C
3331    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3332    compressed row) format.  For good matrix assembly performance the
3333    user should preallocate the matrix storage by setting the parameter nz
3334    (or the array nnz).  By setting these parameters accurately, performance
3335    during matrix assembly can be increased by more than a factor of 50.
3336 
3337    Collective
3338 
3339    Input Parameters:
3340 +  comm - MPI communicator, set to PETSC_COMM_SELF
3341 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3342           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3343 .  m - number of rows
3344 .  n - number of columns
3345 .  nz - number of nonzero blocks  per block row (same for all rows)
3346 -  nnz - array containing the number of nonzero blocks in the various block rows
3347          (possibly different for each block row) or NULL
3348 
3349    Output Parameter:
3350 .  A - the matrix
3351 
3352    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3353    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3354    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3355 
3356    Options Database Keys:
3357 +   -mat_no_unroll - uses code that does not unroll the loops in the
3358                      block calculations (much slower)
3359 -    -mat_block_size - size of the blocks to use
3360 
3361    Level: intermediate
3362 
3363    Notes:
3364    The number of rows and columns must be divisible by blocksize.
3365 
3366    If the nnz parameter is given then the nz parameter is ignored
3367 
3368    A nonzero block is any block that as 1 or more nonzeros in it
3369 
3370    The block AIJ format is fully compatible with standard Fortran 77
3371    storage.  That is, the stored row and column indices can begin at
3372    either one (as in Fortran) or zero.  See the users' manual for details.
3373 
3374    Specify the preallocated storage with either nz or nnz (not both).
3375    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3376    allocation.  See Users-Manual: ch_mat for details.
3377    matrices.
3378 
3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3380 @*/
3381 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3382 {
3383   PetscErrorCode ierr;
3384 
3385   PetscFunctionBegin;
3386   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3387   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3388   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3389   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /*@C
3394    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3395    per row in the matrix. For good matrix assembly performance the
3396    user should preallocate the matrix storage by setting the parameter nz
3397    (or the array nnz).  By setting these parameters accurately, performance
3398    during matrix assembly can be increased by more than a factor of 50.
3399 
3400    Collective
3401 
3402    Input Parameters:
3403 +  B - the matrix
3404 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3405           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3406 .  nz - number of block nonzeros per block row (same for all rows)
3407 -  nnz - array containing the number of block nonzeros in the various block rows
3408          (possibly different for each block row) or NULL
3409 
3410    Options Database Keys:
3411 +   -mat_no_unroll - uses code that does not unroll the loops in the
3412                      block calculations (much slower)
3413 -   -mat_block_size - size of the blocks to use
3414 
3415    Level: intermediate
3416 
3417    Notes:
3418    If the nnz parameter is given then the nz parameter is ignored
3419 
3420    You can call MatGetInfo() to get information on how effective the preallocation was;
3421    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3422    You can also run with the option -info and look for messages with the string
3423    malloc in them to see if additional memory allocation was needed.
3424 
3425    The block AIJ format is fully compatible with standard Fortran 77
3426    storage.  That is, the stored row and column indices can begin at
3427    either one (as in Fortran) or zero.  See the users' manual for details.
3428 
3429    Specify the preallocated storage with either nz or nnz (not both).
3430    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3431    allocation.  See Users-Manual: ch_mat for details.
3432 
3433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3434 @*/
3435 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3436 {
3437   PetscErrorCode ierr;
3438 
3439   PetscFunctionBegin;
3440   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3441   PetscValidType(B,1);
3442   PetscValidLogicalCollectiveInt(B,bs,2);
3443   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*@C
3448    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3449 
3450    Collective
3451 
3452    Input Parameters:
3453 +  B - the matrix
3454 .  i - the indices into j for the start of each local row (starts with zero)
3455 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3456 -  v - optional values in the matrix
3457 
3458    Level: advanced
3459 
3460    Notes:
3461    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3462    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3463    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3464    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3465    block column and the second index is over columns within a block.
3466 
3467    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3468 
3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3470 @*/
3471 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3472 {
3473   PetscErrorCode ierr;
3474 
3475   PetscFunctionBegin;
3476   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3477   PetscValidType(B,1);
3478   PetscValidLogicalCollectiveInt(B,bs,2);
3479   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 
3484 /*@
3485      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3486 
3487      Collective
3488 
3489    Input Parameters:
3490 +  comm - must be an MPI communicator of size 1
3491 .  bs - size of block
3492 .  m - number of rows
3493 .  n - number of columns
3494 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3495 .  j - column indices
3496 -  a - matrix values
3497 
3498    Output Parameter:
3499 .  mat - the matrix
3500 
3501    Level: advanced
3502 
3503    Notes:
3504        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3505     once the matrix is destroyed
3506 
3507        You cannot set new nonzero locations into this matrix, that will generate an error.
3508 
3509        The i and j indices are 0 based
3510 
3511        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3512 
3513       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3514       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3515       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3516       with column-major ordering within blocks.
3517 
3518 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3519 
3520 @*/
3521 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3522 {
3523   PetscErrorCode ierr;
3524   PetscInt       ii;
3525   Mat_SeqBAIJ    *baij;
3526 
3527   PetscFunctionBegin;
3528   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3529   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3530 
3531   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3532   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3533   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3534   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3535   baij = (Mat_SeqBAIJ*)(*mat)->data;
3536   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3537   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3538 
3539   baij->i = i;
3540   baij->j = j;
3541   baij->a = a;
3542 
3543   baij->singlemalloc = PETSC_FALSE;
3544   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3545   baij->free_a       = PETSC_FALSE;
3546   baij->free_ij      = PETSC_FALSE;
3547 
3548   for (ii=0; ii<m; ii++) {
3549     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3550 #if defined(PETSC_USE_DEBUG)
3551     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3552 #endif
3553   }
3554 #if defined(PETSC_USE_DEBUG)
3555   for (ii=0; ii<baij->i[m]; ii++) {
3556     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3557     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3558   }
3559 #endif
3560 
3561   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3567 {
3568   PetscErrorCode ierr;
3569   PetscMPIInt    size;
3570 
3571   PetscFunctionBegin;
3572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3573   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3574     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3575   } else {
3576     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3577   }
3578   PetscFunctionReturn(0);
3579 }
3580