xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e83c9f9bd2cb5c902db2e43dbea8bbeaa0e25153)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms)
21 {
22   PetscErrorCode ierr;
23   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24   PetscInt       N,i;
25   PetscInt       ib,jb,bs = A->rmap->bs;
26   MatScalar      *a_val = a_aij->a;
27 
28   PetscFunctionBegin;
29   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
30   for (i=0; i<N; i++) norms[i] = 0.0;
31   if (type == NORM_2) {
32     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33       for (jb=0; jb<bs; jb++) {
34         for (ib=0; ib<bs; ib++) {
35           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36           a_val++;
37         }
38       }
39     }
40   } else if (type == NORM_1) {
41     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42       for (jb=0; jb<bs; jb++) {
43         for (ib=0; ib<bs; ib++) {
44           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45           a_val++;
46         }
47       }
48     }
49   } else if (type == NORM_INFINITY) {
50     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51       for (jb=0; jb<bs; jb++) {
52         for (ib=0; ib<bs; ib++) {
53           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54           norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]);
55           a_val++;
56         }
57       }
58     }
59   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
60   if (type == NORM_2) {
61     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
62   }
63   PetscFunctionReturn(0);
64 }
65 
66 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
67 {
68   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
69   PetscErrorCode ierr;
70   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
71   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
72   PetscReal      shift = 0.0;
73   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
74 
75   PetscFunctionBegin;
76   allowzeropivot = PetscNot(A->erroriffailure);
77 
78   if (a->idiagvalid) {
79     if (values) *values = a->idiag;
80     PetscFunctionReturn(0);
81   }
82   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
83   diag_offset = a->diag;
84   if (!a->idiag) {
85     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
86     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
87   }
88   diag  = a->idiag;
89   if (values) *values = a->idiag;
90   /* factor and invert each block */
91   switch (bs) {
92   case 1:
93     for (i=0; i<mbs; i++) {
94       odiag    = v + 1*diag_offset[i];
95       diag[0]  = odiag[0];
96 
97       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
98         if (allowzeropivot) {
99           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
101           A->factorerror_zeropivot_row   = i;
102           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
103         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
104       }
105 
106       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
107       diag    += 1;
108     }
109     break;
110   case 2:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 4*diag_offset[i];
113       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
114       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116       diag    += 4;
117     }
118     break;
119   case 3:
120     for (i=0; i<mbs; i++) {
121       odiag    = v + 9*diag_offset[i];
122       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
123       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
124       diag[8]  = odiag[8];
125       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag    += 9;
128     }
129     break;
130   case 4:
131     for (i=0; i<mbs; i++) {
132       odiag  = v + 16*diag_offset[i];
133       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += 16;
137     }
138     break;
139   case 5:
140     for (i=0; i<mbs; i++) {
141       odiag  = v + 25*diag_offset[i];
142       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
143       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag  += 25;
146     }
147     break;
148   case 6:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 36*diag_offset[i];
151       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
152       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 36;
155     }
156     break;
157   case 7:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 49*diag_offset[i];
160       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
161       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 49;
164     }
165     break;
166   default:
167     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
168     for (i=0; i<mbs; i++) {
169       odiag  = v + bs2*diag_offset[i];
170       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
171       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
172       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
173       diag  += bs2;
174     }
175     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
176   }
177   a->idiagvalid = PETSC_TRUE;
178   PetscFunctionReturn(0);
179 }
180 
181 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
182 {
183   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
184   PetscScalar       *x,*work,*w,*workt,*t;
185   const MatScalar   *v,*aa = a->a, *idiag;
186   const PetscScalar *b,*xb;
187   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
188   PetscErrorCode    ierr;
189   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
190   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
191 
192   PetscFunctionBegin;
193   its = its*lits;
194   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
195   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
196   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
197   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
198   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
199 
200   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
201 
202   if (!m) PetscFunctionReturn(0);
203   diag  = a->diag;
204   idiag = a->idiag;
205   k    = PetscMax(A->rmap->n,A->cmap->n);
206   if (!a->mult_work) {
207     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
208   }
209   if (!a->sor_workt) {
210     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
211   }
212   if (!a->sor_work) {
213     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
214   }
215   work = a->mult_work;
216   t    = a->sor_workt;
217   w    = a->sor_work;
218 
219   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
220   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
221 
222   if (flag & SOR_ZERO_INITIAL_GUESS) {
223     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
224       switch (bs) {
225       case 1:
226         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
227         t[0] = b[0];
228         i2     = 1;
229         idiag += 1;
230         for (i=1; i<m; i++) {
231           v  = aa + ai[i];
232           vi = aj + ai[i];
233           nz = diag[i] - ai[i];
234           s[0] = b[i2];
235           for (j=0; j<nz; j++) {
236             xw[0] = x[vi[j]];
237             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
238           }
239           t[i2] = s[0];
240           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
241           x[i2]  = xw[0];
242           idiag += 1;
243           i2    += 1;
244         }
245         break;
246       case 2:
247         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1];
249         i2     = 2;
250         idiag += 4;
251         for (i=1; i<m; i++) {
252           v  = aa + 4*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1];
256           for (j=0; j<nz; j++) {
257             idx = 2*vi[j];
258             it  = 4*j;
259             xw[0] = x[idx]; xw[1] = x[1+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
261           }
262           t[i2] = s[0]; t[i2+1] = s[1];
263           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
264           x[i2]   = xw[0]; x[i2+1] = xw[1];
265           idiag  += 4;
266           i2     += 2;
267         }
268         break;
269       case 3:
270         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
272         i2     = 3;
273         idiag += 9;
274         for (i=1; i<m; i++) {
275           v  = aa + 9*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
279           while (nz--) {
280             idx = 3*(*vi++);
281             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
283             v  += 9;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
286           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
288           idiag  += 9;
289           i2     += 3;
290         }
291         break;
292       case 4:
293         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
295         i2     = 4;
296         idiag += 16;
297         for (i=1; i<m; i++) {
298           v  = aa + 16*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
302           while (nz--) {
303             idx = 4*(*vi++);
304             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
305             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
306             v  += 16;
307           }
308           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
309           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
310           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
311           idiag  += 16;
312           i2     += 4;
313         }
314         break;
315       case 5:
316         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
317         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
318         i2     = 5;
319         idiag += 25;
320         for (i=1; i<m; i++) {
321           v  = aa + 25*ai[i];
322           vi = aj + ai[i];
323           nz = diag[i] - ai[i];
324           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
325           while (nz--) {
326             idx = 5*(*vi++);
327             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
328             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
329             v  += 25;
330           }
331           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
332           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
333           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
334           idiag  += 25;
335           i2     += 5;
336         }
337         break;
338       case 6:
339         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
340         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
341         i2     = 6;
342         idiag += 36;
343         for (i=1; i<m; i++) {
344           v  = aa + 36*ai[i];
345           vi = aj + ai[i];
346           nz = diag[i] - ai[i];
347           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
348           while (nz--) {
349             idx = 6*(*vi++);
350             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
352             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
353             v  += 36;
354           }
355           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
357           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
358           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
359           idiag  += 36;
360           i2     += 6;
361         }
362         break;
363       case 7:
364         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
365         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
366         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
367         i2     = 7;
368         idiag += 49;
369         for (i=1; i<m; i++) {
370           v  = aa + 49*ai[i];
371           vi = aj + ai[i];
372           nz = diag[i] - ai[i];
373           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
374           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
375           while (nz--) {
376             idx = 7*(*vi++);
377             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
378             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
379             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
380             v  += 49;
381           }
382           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
383           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
384           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
385           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
386           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
387           idiag  += 49;
388           i2     += 7;
389         }
390         break;
391       default:
392         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
393         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
394         i2     = bs;
395         idiag += bs2;
396         for (i=1; i<m; i++) {
397           v  = aa + bs2*ai[i];
398           vi = aj + ai[i];
399           nz = diag[i] - ai[i];
400 
401           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
402           /* copy all rows of x that are needed into contiguous space */
403           workt = work;
404           for (j=0; j<nz; j++) {
405             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
406             workt += bs;
407           }
408           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
409           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
410           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
411 
412           idiag += bs2;
413           i2    += bs;
414         }
415         break;
416       }
417       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
418       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
419       xb = t;
420     }
421     else xb = b;
422     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
423       idiag = a->idiag+bs2*(a->mbs-1);
424       i2 = bs * (m-1);
425       switch (bs) {
426       case 1:
427         s[0]  = xb[i2];
428         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
429         x[i2] = xw[0];
430         i2   -= 1;
431         for (i=m-2; i>=0; i--) {
432           v  = aa + (diag[i]+1);
433           vi = aj + diag[i] + 1;
434           nz = ai[i+1] - diag[i] - 1;
435           s[0] = xb[i2];
436           for (j=0; j<nz; j++) {
437             xw[0] = x[vi[j]];
438             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
439           }
440           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
441           x[i2]  = xw[0];
442           idiag -= 1;
443           i2    -= 1;
444         }
445         break;
446       case 2:
447         s[0]  = xb[i2]; s[1] = xb[i2+1];
448         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1];
450         i2    -= 2;
451         idiag -= 4;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 4*(diag[i] + 1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1];
457           for (j=0; j<nz; j++) {
458             idx = 2*vi[j];
459             it  = 4*j;
460             xw[0] = x[idx]; xw[1] = x[1+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
462           }
463           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
464           x[i2]   = xw[0]; x[i2+1] = xw[1];
465           idiag  -= 4;
466           i2     -= 2;
467         }
468         break;
469       case 3:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
471         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
473         i2    -= 3;
474         idiag -= 9;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 9*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
480           while (nz--) {
481             idx = 3*(*vi++);
482             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
484             v  += 9;
485           }
486           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
488           idiag  -= 9;
489           i2     -= 3;
490         }
491         break;
492       case 4:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
494         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
496         i2    -= 4;
497         idiag -= 16;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 16*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
503           while (nz--) {
504             idx = 4*(*vi++);
505             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
506             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
507             v  += 16;
508           }
509           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
510           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
511           idiag  -= 16;
512           i2     -= 4;
513         }
514         break;
515       case 5:
516         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
517         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
518         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
519         i2    -= 5;
520         idiag -= 25;
521         for (i=m-2; i>=0; i--) {
522           v  = aa + 25*(diag[i]+1);
523           vi = aj + diag[i] + 1;
524           nz = ai[i+1] - diag[i] - 1;
525           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
526           while (nz--) {
527             idx = 5*(*vi++);
528             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
529             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
530             v  += 25;
531           }
532           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
533           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
534           idiag  -= 25;
535           i2     -= 5;
536         }
537         break;
538       case 6:
539         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
540         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
541         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
542         i2    -= 6;
543         idiag -= 36;
544         for (i=m-2; i>=0; i--) {
545           v  = aa + 36*(diag[i]+1);
546           vi = aj + diag[i] + 1;
547           nz = ai[i+1] - diag[i] - 1;
548           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
549           while (nz--) {
550             idx = 6*(*vi++);
551             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
553             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
554             v  += 36;
555           }
556           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
557           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
558           idiag  -= 36;
559           i2     -= 6;
560         }
561         break;
562       case 7:
563         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
564         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
565         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
566         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
567         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
568         i2    -= 7;
569         idiag -= 49;
570         for (i=m-2; i>=0; i--) {
571           v  = aa + 49*(diag[i]+1);
572           vi = aj + diag[i] + 1;
573           nz = ai[i+1] - diag[i] - 1;
574           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
575           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
576           while (nz--) {
577             idx = 7*(*vi++);
578             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
579             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
580             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
581             v  += 49;
582           }
583           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
584           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
585           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
586           idiag  -= 49;
587           i2     -= 7;
588         }
589         break;
590       default:
591         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
592         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
593         i2    -= bs;
594         idiag -= bs2;
595         for (i=m-2; i>=0; i--) {
596           v  = aa + bs2*(diag[i]+1);
597           vi = aj + diag[i] + 1;
598           nz = ai[i+1] - diag[i] - 1;
599 
600           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
601           /* copy all rows of x that are needed into contiguous space */
602           workt = work;
603           for (j=0; j<nz; j++) {
604             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
605             workt += bs;
606           }
607           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
608           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
609 
610           idiag -= bs2;
611           i2    -= bs;
612         }
613         break;
614       }
615       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
616     }
617     its--;
618   }
619   while (its--) {
620     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
621       idiag = a->idiag;
622       i2 = 0;
623       switch (bs) {
624       case 1:
625         for (i=0; i<m; i++) {
626           v  = aa + ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2];
630           for (j=0; j<nz; j++) {
631             xw[0] = x[vi[j]];
632             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
633           }
634           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
635           x[i2] += xw[0];
636           idiag += 1;
637           i2    += 1;
638         }
639         break;
640       case 2:
641         for (i=0; i<m; i++) {
642           v  = aa + 4*ai[i];
643           vi = aj + ai[i];
644           nz = ai[i+1] - ai[i];
645           s[0] = b[i2]; s[1] = b[i2+1];
646           for (j=0; j<nz; j++) {
647             idx = 2*vi[j];
648             it  = 4*j;
649             xw[0] = x[idx]; xw[1] = x[1+idx];
650             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
651           }
652           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
653           x[i2]  += xw[0]; x[i2+1] += xw[1];
654           idiag  += 4;
655           i2     += 2;
656         }
657         break;
658       case 3:
659         for (i=0; i<m; i++) {
660           v  = aa + 9*ai[i];
661           vi = aj + ai[i];
662           nz = ai[i+1] - ai[i];
663           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
664           while (nz--) {
665             idx = 3*(*vi++);
666             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
668             v  += 9;
669           }
670           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           idiag  += 9;
673           i2     += 3;
674         }
675         break;
676       case 4:
677         for (i=0; i<m; i++) {
678           v  = aa + 16*ai[i];
679           vi = aj + ai[i];
680           nz = ai[i+1] - ai[i];
681           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
682           while (nz--) {
683             idx = 4*(*vi++);
684             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
685             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
686             v  += 16;
687           }
688           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
689           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
690           idiag  += 16;
691           i2     += 4;
692         }
693         break;
694       case 5:
695         for (i=0; i<m; i++) {
696           v  = aa + 25*ai[i];
697           vi = aj + ai[i];
698           nz = ai[i+1] - ai[i];
699           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
700           while (nz--) {
701             idx = 5*(*vi++);
702             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
703             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
704             v  += 25;
705           }
706           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
707           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
708           idiag  += 25;
709           i2     += 5;
710         }
711         break;
712       case 6:
713         for (i=0; i<m; i++) {
714           v  = aa + 36*ai[i];
715           vi = aj + ai[i];
716           nz = ai[i+1] - ai[i];
717           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
718           while (nz--) {
719             idx = 6*(*vi++);
720             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
721             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
722             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
723             v  += 36;
724           }
725           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
726           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
727           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
728           idiag  += 36;
729           i2     += 6;
730         }
731         break;
732       case 7:
733         for (i=0; i<m; i++) {
734           v  = aa + 49*ai[i];
735           vi = aj + ai[i];
736           nz = ai[i+1] - ai[i];
737           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
738           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
739           while (nz--) {
740             idx = 7*(*vi++);
741             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
742             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
743             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
744             v  += 49;
745           }
746           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
747           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
748           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
749           idiag  += 49;
750           i2     += 7;
751         }
752         break;
753       default:
754         for (i=0; i<m; i++) {
755           v  = aa + bs2*ai[i];
756           vi = aj + ai[i];
757           nz = ai[i+1] - ai[i];
758 
759           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
760           /* copy all rows of x that are needed into contiguous space */
761           workt = work;
762           for (j=0; j<nz; j++) {
763             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
764             workt += bs;
765           }
766           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
767           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
768 
769           idiag += bs2;
770           i2    += bs;
771         }
772         break;
773       }
774       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
777       idiag = a->idiag+bs2*(a->mbs-1);
778       i2 = bs * (m-1);
779       switch (bs) {
780       case 1:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2];
786           for (j=0; j<nz; j++) {
787             xw[0] = x[vi[j]];
788             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
789           }
790           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
791           x[i2] += xw[0];
792           idiag -= 1;
793           i2    -= 1;
794         }
795         break;
796       case 2:
797         for (i=m-1; i>=0; i--) {
798           v  = aa + 4*ai[i];
799           vi = aj + ai[i];
800           nz = ai[i+1] - ai[i];
801           s[0] = b[i2]; s[1] = b[i2+1];
802           for (j=0; j<nz; j++) {
803             idx = 2*vi[j];
804             it  = 4*j;
805             xw[0] = x[idx]; xw[1] = x[1+idx];
806             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
807           }
808           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
809           x[i2]  += xw[0]; x[i2+1] += xw[1];
810           idiag  -= 4;
811           i2     -= 2;
812         }
813         break;
814       case 3:
815         for (i=m-1; i>=0; i--) {
816           v  = aa + 9*ai[i];
817           vi = aj + ai[i];
818           nz = ai[i+1] - ai[i];
819           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
820           while (nz--) {
821             idx = 3*(*vi++);
822             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
824             v  += 9;
825           }
826           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           idiag  -= 9;
829           i2     -= 3;
830         }
831         break;
832       case 4:
833         for (i=m-1; i>=0; i--) {
834           v  = aa + 16*ai[i];
835           vi = aj + ai[i];
836           nz = ai[i+1] - ai[i];
837           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
838           while (nz--) {
839             idx = 4*(*vi++);
840             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
841             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
842             v  += 16;
843           }
844           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
845           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
846           idiag  -= 16;
847           i2     -= 4;
848         }
849         break;
850       case 5:
851         for (i=m-1; i>=0; i--) {
852           v  = aa + 25*ai[i];
853           vi = aj + ai[i];
854           nz = ai[i+1] - ai[i];
855           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
856           while (nz--) {
857             idx = 5*(*vi++);
858             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
859             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
860             v  += 25;
861           }
862           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
863           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
864           idiag  -= 25;
865           i2     -= 5;
866         }
867         break;
868       case 6:
869         for (i=m-1; i>=0; i--) {
870           v  = aa + 36*ai[i];
871           vi = aj + ai[i];
872           nz = ai[i+1] - ai[i];
873           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
874           while (nz--) {
875             idx = 6*(*vi++);
876             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
877             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
878             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
879             v  += 36;
880           }
881           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
882           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
883           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
884           idiag  -= 36;
885           i2     -= 6;
886         }
887         break;
888       case 7:
889         for (i=m-1; i>=0; i--) {
890           v  = aa + 49*ai[i];
891           vi = aj + ai[i];
892           nz = ai[i+1] - ai[i];
893           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
894           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
895           while (nz--) {
896             idx = 7*(*vi++);
897             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
898             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
899             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
900             v  += 49;
901           }
902           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
903           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
904           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
905           idiag  -= 49;
906           i2     -= 7;
907         }
908         break;
909       default:
910         for (i=m-1; i>=0; i--) {
911           v  = aa + bs2*ai[i];
912           vi = aj + ai[i];
913           nz = ai[i+1] - ai[i];
914 
915           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
916           /* copy all rows of x that are needed into contiguous space */
917           workt = work;
918           for (j=0; j<nz; j++) {
919             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
920             workt += bs;
921           }
922           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
923           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
924 
925           idiag -= bs2;
926           i2    -= bs;
927         }
928         break;
929       }
930       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
931     }
932   }
933   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
934   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 /*
939     Special version for direct calls from Fortran (Used in PETSc-fun3d)
940 */
941 #if defined(PETSC_HAVE_FORTRAN_CAPS)
942 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
943 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
944 #define matsetvaluesblocked4_ matsetvaluesblocked4
945 #endif
946 
947 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948 {
949   Mat               A  = *AA;
950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952   PetscInt          *ai    =a->i,*ailen=a->ilen;
953   PetscInt          *aj    =a->j,stepval,lastcol = -1;
954   const PetscScalar *value = v;
955   MatScalar         *ap,*aa = a->a,*bap;
956   PetscErrorCode    ierr;
957 
958   PetscFunctionBegin;
959   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
960   stepval = (n-1)*4;
961   for (k=0; k<m; k++) { /* loop over added rows */
962     row  = im[k];
963     rp   = aj + ai[row];
964     ap   = aa + 16*ai[row];
965     nrow = ailen[row];
966     low  = 0;
967     high = nrow;
968     for (l=0; l<n; l++) { /* loop over added columns */
969       col = in[l];
970       if (col <= lastcol)  low = 0;
971       else                high = nrow;
972       lastcol = col;
973       value   = v + k*(stepval+4 + l)*4;
974       while (high-low > 7) {
975         t = (low+high)/2;
976         if (rp[t] > col) high = t;
977         else             low  = t;
978       }
979       for (i=low; i<high; i++) {
980         if (rp[i] > col) break;
981         if (rp[i] == col) {
982           bap = ap +  16*i;
983           for (ii=0; ii<4; ii++,value+=stepval) {
984             for (jj=ii; jj<16; jj+=4) {
985               bap[jj] += *value++;
986             }
987           }
988           goto noinsert2;
989         }
990       }
991       N = nrow++ - 1;
992       high++; /* added new column index thus must search to one higher than before */
993       /* shift up all the later entries in this row */
994       for (ii=N; ii>=i; ii--) {
995         rp[ii+1] = rp[ii];
996         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
997       }
998       if (N >= i) {
999         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1000       }
1001       rp[i] = col;
1002       bap   = ap +  16*i;
1003       for (ii=0; ii<4; ii++,value+=stepval) {
1004         for (jj=ii; jj<16; jj+=4) {
1005           bap[jj] = *value++;
1006         }
1007       }
1008       noinsert2:;
1009       low = i;
1010     }
1011     ailen[row] = nrow;
1012   }
1013   PetscFunctionReturnVoid();
1014 }
1015 
1016 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1017 #define matsetvalues4_ MATSETVALUES4
1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1019 #define matsetvalues4_ matsetvalues4
1020 #endif
1021 
1022 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1023 {
1024   Mat         A  = *AA;
1025   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1026   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1027   PetscInt    *ai=a->i,*ailen=a->ilen;
1028   PetscInt    *aj=a->j,brow,bcol;
1029   PetscInt    ridx,cidx,lastcol = -1;
1030   MatScalar   *ap,value,*aa=a->a,*bap;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   for (k=0; k<m; k++) { /* loop over added rows */
1035     row  = im[k]; brow = row/4;
1036     rp   = aj + ai[brow];
1037     ap   = aa + 16*ai[brow];
1038     nrow = ailen[brow];
1039     low  = 0;
1040     high = nrow;
1041     for (l=0; l<n; l++) { /* loop over added columns */
1042       col   = in[l]; bcol = col/4;
1043       ridx  = row % 4; cidx = col % 4;
1044       value = v[l + k*n];
1045       if (col <= lastcol)  low = 0;
1046       else                high = nrow;
1047       lastcol = col;
1048       while (high-low > 7) {
1049         t = (low+high)/2;
1050         if (rp[t] > bcol) high = t;
1051         else              low  = t;
1052       }
1053       for (i=low; i<high; i++) {
1054         if (rp[i] > bcol) break;
1055         if (rp[i] == bcol) {
1056           bap   = ap +  16*i + 4*cidx + ridx;
1057           *bap += value;
1058           goto noinsert1;
1059         }
1060       }
1061       N = nrow++ - 1;
1062       high++; /* added new column thus must search to one higher than before */
1063       /* shift up all the later entries in this row */
1064       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1065       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1066       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1067       rp[i]                    = bcol;
1068       ap[16*i + 4*cidx + ridx] = value;
1069 noinsert1:;
1070       low = i;
1071     }
1072     ailen[brow] = nrow;
1073   }
1074   PetscFunctionReturnVoid();
1075 }
1076 
1077 /*
1078      Checks for missing diagonals
1079 */
1080 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1081 {
1082   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       *diag,*ii = a->i,i;
1085 
1086   PetscFunctionBegin;
1087   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1088   *missing = PETSC_FALSE;
1089   if (A->rmap->n > 0 && !ii) {
1090     *missing = PETSC_TRUE;
1091     if (d) *d = 0;
1092     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1093   } else {
1094     PetscInt n;
1095     n = PetscMin(a->mbs, a->nbs);
1096     diag = a->diag;
1097     for (i=0; i<n; i++) {
1098       if (diag[i] >= ii[i+1]) {
1099         *missing = PETSC_TRUE;
1100         if (d) *d = i;
1101         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113   PetscInt       i,j,m = a->mbs;
1114 
1115   PetscFunctionBegin;
1116   if (!a->diag) {
1117     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1118     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1119     a->free_diag = PETSC_TRUE;
1120   }
1121   for (i=0; i<m; i++) {
1122     a->diag[i] = a->i[i+1];
1123     for (j=a->i[i]; j<a->i[i+1]; j++) {
1124       if (a->j[j] == i) {
1125         a->diag[i] = j;
1126         break;
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1134 {
1135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1138   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1139 
1140   PetscFunctionBegin;
1141   *nn = n;
1142   if (!ia) PetscFunctionReturn(0);
1143   if (symmetric) {
1144     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1145     nz   = tia[n];
1146   } else {
1147     tia = a->i; tja = a->j;
1148   }
1149 
1150   if (!blockcompressed && bs > 1) {
1151     (*nn) *= bs;
1152     /* malloc & create the natural set of indices */
1153     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1154     if (n) {
1155       (*ia)[0] = oshift;
1156       for (j=1; j<bs; j++) {
1157         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1158       }
1159     }
1160 
1161     for (i=1; i<n; i++) {
1162       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1163       for (j=1; j<bs; j++) {
1164         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1165       }
1166     }
1167     if (n) {
1168       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1169     }
1170 
1171     if (inja) {
1172       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1173       cnt = 0;
1174       for (i=0; i<n; i++) {
1175         for (j=0; j<bs; j++) {
1176           for (k=tia[i]; k<tia[i+1]; k++) {
1177             for (l=0; l<bs; l++) {
1178               (*ja)[cnt++] = bs*tja[k] + l;
1179             }
1180           }
1181         }
1182       }
1183     }
1184 
1185     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1186       ierr = PetscFree(tia);CHKERRQ(ierr);
1187       ierr = PetscFree(tja);CHKERRQ(ierr);
1188     }
1189   } else if (oshift == 1) {
1190     if (symmetric) {
1191       nz = tia[A->rmap->n/bs];
1192       /*  add 1 to i and j indices */
1193       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1194       *ia = tia;
1195       if (ja) {
1196         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1197         *ja = tja;
1198       }
1199     } else {
1200       nz = a->i[A->rmap->n/bs];
1201       /* malloc space and  add 1 to i and j indices */
1202       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1203       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1204       if (ja) {
1205         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1206         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1207       }
1208     }
1209   } else {
1210     *ia = tia;
1211     if (ja) *ja = tja;
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1217 {
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1230 {
1231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235 #if defined(PETSC_USE_LOG)
1236   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1237 #endif
1238   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1239   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1240   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1241   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1242   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1243   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1244   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1245   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1246   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1247   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1248   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1249   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1250   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1251 
1252   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1253   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1254   ierr = PetscFree(A->data);CHKERRQ(ierr);
1255 
1256   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1260   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1261   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1262   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1265   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1268 #if defined(PETSC_HAVE_HYPRE)
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1270 #endif
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1276 {
1277   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   switch (op) {
1282   case MAT_ROW_ORIENTED:
1283     a->roworiented = flg;
1284     break;
1285   case MAT_KEEP_NONZERO_PATTERN:
1286     a->keepnonzeropattern = flg;
1287     break;
1288   case MAT_NEW_NONZERO_LOCATIONS:
1289     a->nonew = (flg ? 0 : 1);
1290     break;
1291   case MAT_NEW_NONZERO_LOCATION_ERR:
1292     a->nonew = (flg ? -1 : 0);
1293     break;
1294   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1295     a->nonew = (flg ? -2 : 0);
1296     break;
1297   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1298     a->nounused = (flg ? -1 : 0);
1299     break;
1300   case MAT_FORCE_DIAGONAL_ENTRIES:
1301   case MAT_IGNORE_OFF_PROC_ENTRIES:
1302   case MAT_USE_HASH_TABLE:
1303   case MAT_SORTED_FULL:
1304     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1305     break;
1306   case MAT_SPD:
1307   case MAT_SYMMETRIC:
1308   case MAT_STRUCTURALLY_SYMMETRIC:
1309   case MAT_HERMITIAN:
1310   case MAT_SYMMETRY_ETERNAL:
1311   case MAT_SUBMAT_SINGLEIS:
1312   case MAT_STRUCTURE_ONLY:
1313     /* These options are handled directly by MatSetOption() */
1314     break;
1315   default:
1316     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1322 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1323 {
1324   PetscErrorCode ierr;
1325   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1326   MatScalar      *aa_i;
1327   PetscScalar    *v_i;
1328 
1329   PetscFunctionBegin;
1330   bs  = A->rmap->bs;
1331   bs2 = bs*bs;
1332   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1333 
1334   bn  = row/bs;   /* Block number */
1335   bp  = row % bs; /* Block Position */
1336   M   = ai[bn+1] - ai[bn];
1337   *nz = bs*M;
1338 
1339   if (v) {
1340     *v = NULL;
1341     if (*nz) {
1342       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1343       for (i=0; i<M; i++) { /* for each block in the block row */
1344         v_i  = *v + i*bs;
1345         aa_i = aa + bs2*(ai[bn] + i);
1346         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1347       }
1348     }
1349   }
1350 
1351   if (idx) {
1352     *idx = NULL;
1353     if (*nz) {
1354       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1355       for (i=0; i<M; i++) { /* for each block in the block row */
1356         idx_i = *idx + i*bs;
1357         itmp  = bs*aj[ai[bn] + i];
1358         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1359       }
1360     }
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1366 {
1367   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   if (nz)  *nz = 0;
1381   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1382   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1387 {
1388   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1389   Mat            C;
1390   PetscErrorCode ierr;
1391   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1392   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1393   MatScalar      *ata,*aa=a->a;
1394 
1395   PetscFunctionBegin;
1396   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1397   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1398     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1399 
1400     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1401     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1402     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1403     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1404 
1405     at  = (Mat_SeqBAIJ*)C->data;
1406     ati = at->i;
1407     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1408   } else {
1409     C = *B;
1410     at = (Mat_SeqBAIJ*)C->data;
1411     ati = at->i;
1412   }
1413 
1414   atj = at->j;
1415   ata = at->a;
1416 
1417   /* Copy ati into atfill so we have locations of the next free space in atj */
1418   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1419 
1420   /* Walk through A row-wise and mark nonzero entries of A^T. */
1421   for (i=0; i<mbs; i++) {
1422     anzj = ai[i+1] - ai[i];
1423     for (j=0; j<anzj; j++) {
1424       atj[atfill[*aj]] = i;
1425       for (kr=0; kr<bs; kr++) {
1426         for (k=0; k<bs; k++) {
1427           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1428         }
1429       }
1430       atfill[*aj++] += 1;
1431     }
1432   }
1433   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435 
1436   /* Clean up temporary space and complete requests. */
1437   ierr = PetscFree(atfill);CHKERRQ(ierr);
1438 
1439   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1440     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1441     *B = C;
1442   } else {
1443     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1449 {
1450   PetscErrorCode ierr;
1451   Mat            Btrans;
1452 
1453   PetscFunctionBegin;
1454   *f   = PETSC_FALSE;
1455   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1456   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1457   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1462 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1463 {
1464   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1465   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1466   PetscInt       *rowlens,*colidxs;
1467   PetscScalar    *matvals;
1468   PetscErrorCode ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1472 
1473   M  = mat->rmap->N;
1474   N  = mat->cmap->N;
1475   m  = mat->rmap->n;
1476   bs = mat->rmap->bs;
1477   nz = bs*bs*A->nz;
1478 
1479   /* write matrix header */
1480   header[0] = MAT_FILE_CLASSID;
1481   header[1] = M; header[2] = N; header[3] = nz;
1482   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1483 
1484   /* store row lengths */
1485   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1486   for (cnt=0, i=0; i<A->mbs; i++)
1487     for (j=0; j<bs; j++)
1488       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1489   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1490   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1491 
1492   /* store column indices  */
1493   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1494   for (cnt=0, i=0; i<A->mbs; i++)
1495     for (k=0; k<bs; k++)
1496       for (j=A->i[i]; j<A->i[i+1]; j++)
1497         for (l=0; l<bs; l++)
1498           colidxs[cnt++] = bs*A->j[j] + l;
1499   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1500   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1501   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1502 
1503   /* store nonzero values */
1504   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1505   for (cnt=0, i=0; i<A->mbs; i++)
1506     for (k=0; k<bs; k++)
1507       for (j=A->i[i]; j<A->i[i+1]; j++)
1508         for (l=0; l<bs; l++)
1509           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1510   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1511   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1512   ierr = PetscFree(matvals);CHKERRQ(ierr);
1513 
1514   /* write block size option to the viewer's .info file */
1515   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1520 {
1521   PetscErrorCode ierr;
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscInt       i,bs = A->rmap->bs,k;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1527   for (i=0; i<a->mbs; i++) {
1528     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1529     for (k=a->i[i]; k<a->i[i+1]; k++) {
1530       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1531     }
1532     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1533   }
1534   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1539 {
1540   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1541   PetscErrorCode    ierr;
1542   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1543   PetscViewerFormat format;
1544 
1545   PetscFunctionBegin;
1546   if (A->structure_only) {
1547     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1548     PetscFunctionReturn(0);
1549   }
1550 
1551   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1552   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1553     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1554   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1555     const char *matname;
1556     Mat        aij;
1557     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1558     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1559     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1560     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1562   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1563       PetscFunctionReturn(0);
1564   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1565     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1566     for (i=0; i<a->mbs; i++) {
1567       for (j=0; j<bs; j++) {
1568         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1569         for (k=a->i[i]; k<a->i[i+1]; k++) {
1570           for (l=0; l<bs; l++) {
1571 #if defined(PETSC_USE_COMPLEX)
1572             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1574                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1575             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1576               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1577                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1578             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1579               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1580             }
1581 #else
1582             if (a->a[bs2*k + l*bs + j] != 0.0) {
1583               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1584             }
1585 #endif
1586           }
1587         }
1588         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1589       }
1590     }
1591     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1592   } else {
1593     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1594     for (i=0; i<a->mbs; i++) {
1595       for (j=0; j<bs; j++) {
1596         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1597         for (k=a->i[i]; k<a->i[i+1]; k++) {
1598           for (l=0; l<bs; l++) {
1599 #if defined(PETSC_USE_COMPLEX)
1600             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1602                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1603             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1604               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1605                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1606             } else {
1607               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1608             }
1609 #else
1610             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1611 #endif
1612           }
1613         }
1614         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1615       }
1616     }
1617     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1618   }
1619   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #include <petscdraw.h>
1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1625 {
1626   Mat               A = (Mat) Aa;
1627   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1628   PetscErrorCode    ierr;
1629   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1630   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1631   MatScalar         *aa;
1632   PetscViewer       viewer;
1633   PetscViewerFormat format;
1634 
1635   PetscFunctionBegin;
1636   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1637   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1638   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1639 
1640   /* loop over matrix elements drawing boxes */
1641 
1642   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1643     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1644     /* Blue for negative, Cyan for zero and  Red for positive */
1645     color = PETSC_DRAW_BLUE;
1646     for (i=0,row=0; i<mbs; i++,row+=bs) {
1647       for (j=a->i[i]; j<a->i[i+1]; j++) {
1648         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1649         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1650         aa  = a->a + j*bs2;
1651         for (k=0; k<bs; k++) {
1652           for (l=0; l<bs; l++) {
1653             if (PetscRealPart(*aa++) >=  0.) continue;
1654             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1655           }
1656         }
1657       }
1658     }
1659     color = PETSC_DRAW_CYAN;
1660     for (i=0,row=0; i<mbs; i++,row+=bs) {
1661       for (j=a->i[i]; j<a->i[i+1]; j++) {
1662         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1663         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1664         aa  = a->a + j*bs2;
1665         for (k=0; k<bs; k++) {
1666           for (l=0; l<bs; l++) {
1667             if (PetscRealPart(*aa++) != 0.) continue;
1668             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1669           }
1670         }
1671       }
1672     }
1673     color = PETSC_DRAW_RED;
1674     for (i=0,row=0; i<mbs; i++,row+=bs) {
1675       for (j=a->i[i]; j<a->i[i+1]; j++) {
1676         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1677         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1678         aa  = a->a + j*bs2;
1679         for (k=0; k<bs; k++) {
1680           for (l=0; l<bs; l++) {
1681             if (PetscRealPart(*aa++) <= 0.) continue;
1682             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1683           }
1684         }
1685       }
1686     }
1687     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1688   } else {
1689     /* use contour shading to indicate magnitude of values */
1690     /* first determine max of all nonzero values */
1691     PetscReal minv = 0.0, maxv = 0.0;
1692     PetscDraw popup;
1693 
1694     for (i=0; i<a->nz*a->bs2; i++) {
1695       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1696     }
1697     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1698     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1699     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1700 
1701     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1702     for (i=0,row=0; i<mbs; i++,row+=bs) {
1703       for (j=a->i[i]; j<a->i[i+1]; j++) {
1704         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1705         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1706         aa  = a->a + j*bs2;
1707         for (k=0; k<bs; k++) {
1708           for (l=0; l<bs; l++) {
1709             MatScalar v = *aa++;
1710             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1711             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1712           }
1713         }
1714       }
1715     }
1716     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1722 {
1723   PetscErrorCode ierr;
1724   PetscReal      xl,yl,xr,yr,w,h;
1725   PetscDraw      draw;
1726   PetscBool      isnull;
1727 
1728   PetscFunctionBegin;
1729   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1730   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1731   if (isnull) PetscFunctionReturn(0);
1732 
1733   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1734   xr  += w;          yr += h;        xl = -w;     yl = -h;
1735   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1736   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1737   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1738   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1739   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1744 {
1745   PetscErrorCode ierr;
1746   PetscBool      iascii,isbinary,isdraw;
1747 
1748   PetscFunctionBegin;
1749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1752   if (iascii) {
1753     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1754   } else if (isbinary) {
1755     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1756   } else if (isdraw) {
1757     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1758   } else {
1759     Mat B;
1760     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1761     ierr = MatView(B,viewer);CHKERRQ(ierr);
1762     ierr = MatDestroy(&B);CHKERRQ(ierr);
1763   }
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1768 {
1769   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1770   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1771   PetscInt    *ai = a->i,*ailen = a->ilen;
1772   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1773   MatScalar   *ap,*aa = a->a;
1774 
1775   PetscFunctionBegin;
1776   for (k=0; k<m; k++) { /* loop over rows */
1777     row = im[k]; brow = row/bs;
1778     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1779     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1780     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1781     nrow = ailen[brow];
1782     for (l=0; l<n; l++) { /* loop over columns */
1783       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1784       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1785       col  = in[l];
1786       bcol = col/bs;
1787       cidx = col%bs;
1788       ridx = row%bs;
1789       high = nrow;
1790       low  = 0; /* assume unsorted */
1791       while (high-low > 5) {
1792         t = (low+high)/2;
1793         if (rp[t] > bcol) high = t;
1794         else             low  = t;
1795       }
1796       for (i=low; i<high; i++) {
1797         if (rp[i] > bcol) break;
1798         if (rp[i] == bcol) {
1799           *v++ = ap[bs2*i+bs*cidx+ridx];
1800           goto finished;
1801         }
1802       }
1803       *v++ = 0.0;
1804 finished:;
1805     }
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1811 {
1812   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1813   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1814   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1815   PetscErrorCode    ierr;
1816   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1817   PetscBool         roworiented=a->roworiented;
1818   const PetscScalar *value     = v;
1819   MatScalar         *ap=NULL,*aa = a->a,*bap;
1820 
1821   PetscFunctionBegin;
1822   if (roworiented) {
1823     stepval = (n-1)*bs;
1824   } else {
1825     stepval = (m-1)*bs;
1826   }
1827   for (k=0; k<m; k++) { /* loop over added rows */
1828     row = im[k];
1829     if (row < 0) continue;
1830     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1831     rp   = aj + ai[row];
1832     if (!A->structure_only) ap = aa + bs2*ai[row];
1833     rmax = imax[row];
1834     nrow = ailen[row];
1835     low  = 0;
1836     high = nrow;
1837     for (l=0; l<n; l++) { /* loop over added columns */
1838       if (in[l] < 0) continue;
1839       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1840       col = in[l];
1841       if (!A->structure_only) {
1842         if (roworiented) {
1843           value = v + (k*(stepval+bs) + l)*bs;
1844         } else {
1845           value = v + (l*(stepval+bs) + k)*bs;
1846         }
1847       }
1848       if (col <= lastcol) low = 0;
1849       else high = nrow;
1850       lastcol = col;
1851       while (high-low > 7) {
1852         t = (low+high)/2;
1853         if (rp[t] > col) high = t;
1854         else             low  = t;
1855       }
1856       for (i=low; i<high; i++) {
1857         if (rp[i] > col) break;
1858         if (rp[i] == col) {
1859           if (A->structure_only) goto noinsert2;
1860           bap = ap +  bs2*i;
1861           if (roworiented) {
1862             if (is == ADD_VALUES) {
1863               for (ii=0; ii<bs; ii++,value+=stepval) {
1864                 for (jj=ii; jj<bs2; jj+=bs) {
1865                   bap[jj] += *value++;
1866                 }
1867               }
1868             } else {
1869               for (ii=0; ii<bs; ii++,value+=stepval) {
1870                 for (jj=ii; jj<bs2; jj+=bs) {
1871                   bap[jj] = *value++;
1872                 }
1873               }
1874             }
1875           } else {
1876             if (is == ADD_VALUES) {
1877               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1878                 for (jj=0; jj<bs; jj++) {
1879                   bap[jj] += value[jj];
1880                 }
1881                 bap += bs;
1882               }
1883             } else {
1884               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1885                 for (jj=0; jj<bs; jj++) {
1886                   bap[jj]  = value[jj];
1887                 }
1888                 bap += bs;
1889               }
1890             }
1891           }
1892           goto noinsert2;
1893         }
1894       }
1895       if (nonew == 1) goto noinsert2;
1896       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1897       if (A->structure_only) {
1898         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1899       } else {
1900         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1901       }
1902       N = nrow++ - 1; high++;
1903       /* shift up all the later entries in this row */
1904       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1905       rp[i] = col;
1906       if (!A->structure_only) {
1907         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1908         bap   = ap +  bs2*i;
1909         if (roworiented) {
1910           for (ii=0; ii<bs; ii++,value+=stepval) {
1911             for (jj=ii; jj<bs2; jj+=bs) {
1912               bap[jj] = *value++;
1913             }
1914           }
1915         } else {
1916           for (ii=0; ii<bs; ii++,value+=stepval) {
1917             for (jj=0; jj<bs; jj++) {
1918               *bap++ = *value++;
1919             }
1920           }
1921         }
1922       }
1923 noinsert2:;
1924       low = i;
1925     }
1926     ailen[row] = nrow;
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1932 {
1933   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1934   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1935   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1936   PetscErrorCode ierr;
1937   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1938   MatScalar      *aa  = a->a,*ap;
1939   PetscReal      ratio=0.6;
1940 
1941   PetscFunctionBegin;
1942   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1943 
1944   if (m) rmax = ailen[0];
1945   for (i=1; i<mbs; i++) {
1946     /* move each row back by the amount of empty slots (fshift) before it*/
1947     fshift += imax[i-1] - ailen[i-1];
1948     rmax    = PetscMax(rmax,ailen[i]);
1949     if (fshift) {
1950       ip = aj + ai[i];
1951       ap = aa + bs2*ai[i];
1952       N  = ailen[i];
1953       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1954       if (!A->structure_only) {
1955         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1956       }
1957     }
1958     ai[i] = ai[i-1] + ailen[i-1];
1959   }
1960   if (mbs) {
1961     fshift += imax[mbs-1] - ailen[mbs-1];
1962     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1963   }
1964 
1965   /* reset ilen and imax for each row */
1966   a->nonzerorowcnt = 0;
1967   if (A->structure_only) {
1968     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1969   } else { /* !A->structure_only */
1970     for (i=0; i<mbs; i++) {
1971       ailen[i] = imax[i] = ai[i+1] - ai[i];
1972       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1973     }
1974   }
1975   a->nz = ai[mbs];
1976 
1977   /* diagonals may have moved, so kill the diagonal pointers */
1978   a->idiagvalid = PETSC_FALSE;
1979   if (fshift && a->diag) {
1980     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1981     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1982     a->diag = NULL;
1983   }
1984   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1985   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1986   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1987   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1988 
1989   A->info.mallocs    += a->reallocs;
1990   a->reallocs         = 0;
1991   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1992   a->rmax             = rmax;
1993 
1994   if (!A->structure_only) {
1995     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /*
2001    This function returns an array of flags which indicate the locations of contiguous
2002    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2003    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2004    Assume: sizes should be long enough to hold all the values.
2005 */
2006 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2007 {
2008   PetscInt  i,j,k,row;
2009   PetscBool flg;
2010 
2011   PetscFunctionBegin;
2012   for (i=0,j=0; i<n; j++) {
2013     row = idx[i];
2014     if (row%bs!=0) { /* Not the beginning of a block */
2015       sizes[j] = 1;
2016       i++;
2017     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2018       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2019       i++;
2020     } else { /* Begining of the block, so check if the complete block exists */
2021       flg = PETSC_TRUE;
2022       for (k=1; k<bs; k++) {
2023         if (row+k != idx[i+k]) { /* break in the block */
2024           flg = PETSC_FALSE;
2025           break;
2026         }
2027       }
2028       if (flg) { /* No break in the bs */
2029         sizes[j] = bs;
2030         i       += bs;
2031       } else {
2032         sizes[j] = 1;
2033         i++;
2034       }
2035     }
2036   }
2037   *bs_max = j;
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2042 {
2043   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2044   PetscErrorCode    ierr;
2045   PetscInt          i,j,k,count,*rows;
2046   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2047   PetscScalar       zero = 0.0;
2048   MatScalar         *aa;
2049   const PetscScalar *xx;
2050   PetscScalar       *bb;
2051 
2052   PetscFunctionBegin;
2053   /* fix right hand side if needed */
2054   if (x && b) {
2055     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2056     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2057     for (i=0; i<is_n; i++) {
2058       bb[is_idx[i]] = diag*xx[is_idx[i]];
2059     }
2060     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2061     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2062   }
2063 
2064   /* Make a copy of the IS and  sort it */
2065   /* allocate memory for rows,sizes */
2066   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2067 
2068   /* copy IS values to rows, and sort them */
2069   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2070   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2071 
2072   if (baij->keepnonzeropattern) {
2073     for (i=0; i<is_n; i++) sizes[i] = 1;
2074     bs_max          = is_n;
2075   } else {
2076     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2077     A->nonzerostate++;
2078   }
2079 
2080   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2081     row = rows[j];
2082     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2083     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2084     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2085     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2086       if (diag != (PetscScalar)0.0) {
2087         if (baij->ilen[row/bs] > 0) {
2088           baij->ilen[row/bs]       = 1;
2089           baij->j[baij->i[row/bs]] = row/bs;
2090 
2091           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2092         }
2093         /* Now insert all the diagonal values for this bs */
2094         for (k=0; k<bs; k++) {
2095           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2096         }
2097       } else { /* (diag == 0.0) */
2098         baij->ilen[row/bs] = 0;
2099       } /* end (diag == 0.0) */
2100     } else { /* (sizes[i] != bs) */
2101       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2102       for (k=0; k<count; k++) {
2103         aa[0] =  zero;
2104         aa   += bs;
2105       }
2106       if (diag != (PetscScalar)0.0) {
2107         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2108       }
2109     }
2110   }
2111 
2112   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2113   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2118 {
2119   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2120   PetscErrorCode    ierr;
2121   PetscInt          i,j,k,count;
2122   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2123   PetscScalar       zero = 0.0;
2124   MatScalar         *aa;
2125   const PetscScalar *xx;
2126   PetscScalar       *bb;
2127   PetscBool         *zeroed,vecs = PETSC_FALSE;
2128 
2129   PetscFunctionBegin;
2130   /* fix right hand side if needed */
2131   if (x && b) {
2132     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2133     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2134     vecs = PETSC_TRUE;
2135   }
2136 
2137   /* zero the columns */
2138   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2139   for (i=0; i<is_n; i++) {
2140     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2141     zeroed[is_idx[i]] = PETSC_TRUE;
2142   }
2143   for (i=0; i<A->rmap->N; i++) {
2144     if (!zeroed[i]) {
2145       row = i/bs;
2146       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2147         for (k=0; k<bs; k++) {
2148           col = bs*baij->j[j] + k;
2149           if (zeroed[col]) {
2150             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2151             if (vecs) bb[i] -= aa[0]*xx[col];
2152             aa[0] = 0.0;
2153           }
2154         }
2155       }
2156     } else if (vecs) bb[i] = diag*xx[i];
2157   }
2158   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2159   if (vecs) {
2160     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2161     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2162   }
2163 
2164   /* zero the rows */
2165   for (i=0; i<is_n; i++) {
2166     row   = is_idx[i];
2167     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2168     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2169     for (k=0; k<count; k++) {
2170       aa[0] =  zero;
2171       aa   += bs;
2172     }
2173     if (diag != (PetscScalar)0.0) {
2174       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2175     }
2176   }
2177   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2182 {
2183   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2184   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2185   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2186   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2187   PetscErrorCode ierr;
2188   PetscInt       ridx,cidx,bs2=a->bs2;
2189   PetscBool      roworiented=a->roworiented;
2190   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2191 
2192   PetscFunctionBegin;
2193   for (k=0; k<m; k++) { /* loop over added rows */
2194     row  = im[k];
2195     brow = row/bs;
2196     if (row < 0) continue;
2197     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2198     rp   = aj + ai[brow];
2199     if (!A->structure_only) ap = aa + bs2*ai[brow];
2200     rmax = imax[brow];
2201     nrow = ailen[brow];
2202     low  = 0;
2203     high = nrow;
2204     for (l=0; l<n; l++) { /* loop over added columns */
2205       if (in[l] < 0) continue;
2206       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2207       col  = in[l]; bcol = col/bs;
2208       ridx = row % bs; cidx = col % bs;
2209       if (!A->structure_only) {
2210         if (roworiented) {
2211           value = v[l + k*n];
2212         } else {
2213           value = v[k + l*m];
2214         }
2215       }
2216       if (col <= lastcol) low = 0; else high = nrow;
2217       lastcol = col;
2218       while (high-low > 7) {
2219         t = (low+high)/2;
2220         if (rp[t] > bcol) high = t;
2221         else              low  = t;
2222       }
2223       for (i=low; i<high; i++) {
2224         if (rp[i] > bcol) break;
2225         if (rp[i] == bcol) {
2226           bap = ap +  bs2*i + bs*cidx + ridx;
2227           if (!A->structure_only) {
2228             if (is == ADD_VALUES) *bap += value;
2229             else                  *bap  = value;
2230           }
2231           goto noinsert1;
2232         }
2233       }
2234       if (nonew == 1) goto noinsert1;
2235       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2236       if (A->structure_only) {
2237         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2238       } else {
2239         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2240       }
2241       N = nrow++ - 1; high++;
2242       /* shift up all the later entries in this row */
2243       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2244       rp[i] = bcol;
2245       if (!A->structure_only) {
2246         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2247         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2248         ap[bs2*i + bs*cidx + ridx] = value;
2249       }
2250       a->nz++;
2251       A->nonzerostate++;
2252 noinsert1:;
2253       low = i;
2254     }
2255     ailen[brow] = nrow;
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2261 {
2262   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2263   Mat            outA;
2264   PetscErrorCode ierr;
2265   PetscBool      row_identity,col_identity;
2266 
2267   PetscFunctionBegin;
2268   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2269   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2270   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2271   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2272 
2273   outA            = inA;
2274   inA->factortype = MAT_FACTOR_LU;
2275   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2276   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2277 
2278   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2279 
2280   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2281   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2282   a->row = row;
2283   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2284   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2285   a->col = col;
2286 
2287   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2288   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2289   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2290   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2291 
2292   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2293   if (!a->solve_work) {
2294     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2295     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2296   }
2297   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2302 {
2303   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2304   PetscInt    i,nz,mbs;
2305 
2306   PetscFunctionBegin;
2307   nz  = baij->maxnz;
2308   mbs = baij->mbs;
2309   for (i=0; i<nz; i++) {
2310     baij->j[i] = indices[i];
2311   }
2312   baij->nz = nz;
2313   for (i=0; i<mbs; i++) {
2314     baij->ilen[i] = baij->imax[i];
2315   }
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 /*@
2320     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2321        in the matrix.
2322 
2323   Input Parameters:
2324 +  mat - the SeqBAIJ matrix
2325 -  indices - the column indices
2326 
2327   Level: advanced
2328 
2329   Notes:
2330     This can be called if you have precomputed the nonzero structure of the
2331   matrix and want to provide it to the matrix object to improve the performance
2332   of the MatSetValues() operation.
2333 
2334     You MUST have set the correct numbers of nonzeros per row in the call to
2335   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2336 
2337     MUST be called before any calls to MatSetValues();
2338 
2339 @*/
2340 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2341 {
2342   PetscErrorCode ierr;
2343 
2344   PetscFunctionBegin;
2345   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2346   PetscValidPointer(indices,2);
2347   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2352 {
2353   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2354   PetscErrorCode ierr;
2355   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2356   PetscReal      atmp;
2357   PetscScalar    *x,zero = 0.0;
2358   MatScalar      *aa;
2359   PetscInt       ncols,brow,krow,kcol;
2360 
2361   PetscFunctionBegin;
2362   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2363   bs  = A->rmap->bs;
2364   aa  = a->a;
2365   ai  = a->i;
2366   aj  = a->j;
2367   mbs = a->mbs;
2368 
2369   ierr = VecSet(v,zero);CHKERRQ(ierr);
2370   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2371   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2372   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2373   for (i=0; i<mbs; i++) {
2374     ncols = ai[1] - ai[0]; ai++;
2375     brow  = bs*i;
2376     for (j=0; j<ncols; j++) {
2377       for (kcol=0; kcol<bs; kcol++) {
2378         for (krow=0; krow<bs; krow++) {
2379           atmp = PetscAbsScalar(*aa);aa++;
2380           row  = brow + krow;   /* row index */
2381           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2382         }
2383       }
2384       aj++;
2385     }
2386   }
2387   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2392 {
2393   PetscErrorCode ierr;
2394 
2395   PetscFunctionBegin;
2396   /* If the two matrices have the same copy implementation, use fast copy. */
2397   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2398     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2399     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2400     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2401 
2402     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2403     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2404     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2405     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2406   } else {
2407     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2408   }
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2413 {
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2422 {
2423   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2424 
2425   PetscFunctionBegin;
2426   *array = a->a;
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2431 {
2432   PetscFunctionBegin;
2433   *array = NULL;
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2438 {
2439   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2440   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2441   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   /* Set the number of nonzeros in the new matrix */
2446   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2451 {
2452   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2453   PetscErrorCode ierr;
2454   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2455   PetscBLASInt   one=1;
2456 
2457   PetscFunctionBegin;
2458   if (str == SAME_NONZERO_PATTERN) {
2459     PetscScalar  alpha = a;
2460     PetscBLASInt bnz;
2461     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2462     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2463     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2464   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2465     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2466   } else {
2467     Mat      B;
2468     PetscInt *nnz;
2469     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2470     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2471     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2472     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2473     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2474     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2475     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2476     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2477     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2478     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2479     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2480     ierr = PetscFree(nnz);CHKERRQ(ierr);
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2486 {
2487 #if defined(PETSC_USE_COMPLEX)
2488   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2489   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2490   MatScalar   *aa = a->a;
2491 
2492   PetscFunctionBegin;
2493   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2494 #else
2495   PetscFunctionBegin;
2496 #endif
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2501 {
2502   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2503   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2504   MatScalar   *aa = a->a;
2505 
2506   PetscFunctionBegin;
2507   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2512 {
2513   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2514   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2515   MatScalar   *aa = a->a;
2516 
2517   PetscFunctionBegin;
2518   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 /*
2523     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2524 */
2525 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2526 {
2527   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2528   PetscErrorCode ierr;
2529   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2530   PetscInt       nz = a->i[m],row,*jj,mr,col;
2531 
2532   PetscFunctionBegin;
2533   *nn = n;
2534   if (!ia) PetscFunctionReturn(0);
2535   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2536   else {
2537     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2538     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2539     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2540     jj   = a->j;
2541     for (i=0; i<nz; i++) {
2542       collengths[jj[i]]++;
2543     }
2544     cia[0] = oshift;
2545     for (i=0; i<n; i++) {
2546       cia[i+1] = cia[i] + collengths[i];
2547     }
2548     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2549     jj   = a->j;
2550     for (row=0; row<m; row++) {
2551       mr = a->i[row+1] - a->i[row];
2552       for (i=0; i<mr; i++) {
2553         col = *jj++;
2554 
2555         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2556       }
2557     }
2558     ierr = PetscFree(collengths);CHKERRQ(ierr);
2559     *ia  = cia; *ja = cja;
2560   }
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2565 {
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   if (!ia) PetscFunctionReturn(0);
2570   ierr = PetscFree(*ia);CHKERRQ(ierr);
2571   ierr = PetscFree(*ja);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 /*
2576  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2577  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2578  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2579  */
2580 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2581 {
2582   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2583   PetscErrorCode ierr;
2584   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2585   PetscInt       nz = a->i[m],row,*jj,mr,col;
2586   PetscInt       *cspidx;
2587 
2588   PetscFunctionBegin;
2589   *nn = n;
2590   if (!ia) PetscFunctionReturn(0);
2591 
2592   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2593   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2594   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2595   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2596   jj   = a->j;
2597   for (i=0; i<nz; i++) {
2598     collengths[jj[i]]++;
2599   }
2600   cia[0] = oshift;
2601   for (i=0; i<n; i++) {
2602     cia[i+1] = cia[i] + collengths[i];
2603   }
2604   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2605   jj   = a->j;
2606   for (row=0; row<m; row++) {
2607     mr = a->i[row+1] - a->i[row];
2608     for (i=0; i<mr; i++) {
2609       col = *jj++;
2610       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2611       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2612     }
2613   }
2614   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2615   *ia    = cia;
2616   *ja    = cja;
2617   *spidx = cspidx;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2622 {
2623   PetscErrorCode ierr;
2624 
2625   PetscFunctionBegin;
2626   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2627   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2632 {
2633   PetscErrorCode ierr;
2634   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2635 
2636   PetscFunctionBegin;
2637   if (!Y->preallocated || !aij->nz) {
2638     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2639   }
2640   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 /* -------------------------------------------------------------------*/
2645 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2646                                        MatGetRow_SeqBAIJ,
2647                                        MatRestoreRow_SeqBAIJ,
2648                                        MatMult_SeqBAIJ_N,
2649                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2650                                        MatMultTranspose_SeqBAIJ,
2651                                        MatMultTransposeAdd_SeqBAIJ,
2652                                        NULL,
2653                                        NULL,
2654                                        NULL,
2655                                /* 10*/ NULL,
2656                                        MatLUFactor_SeqBAIJ,
2657                                        NULL,
2658                                        NULL,
2659                                        MatTranspose_SeqBAIJ,
2660                                /* 15*/ MatGetInfo_SeqBAIJ,
2661                                        MatEqual_SeqBAIJ,
2662                                        MatGetDiagonal_SeqBAIJ,
2663                                        MatDiagonalScale_SeqBAIJ,
2664                                        MatNorm_SeqBAIJ,
2665                                /* 20*/ NULL,
2666                                        MatAssemblyEnd_SeqBAIJ,
2667                                        MatSetOption_SeqBAIJ,
2668                                        MatZeroEntries_SeqBAIJ,
2669                                /* 24*/ MatZeroRows_SeqBAIJ,
2670                                        NULL,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                /* 29*/ MatSetUp_SeqBAIJ,
2675                                        NULL,
2676                                        NULL,
2677                                        NULL,
2678                                        NULL,
2679                                /* 34*/ MatDuplicate_SeqBAIJ,
2680                                        NULL,
2681                                        NULL,
2682                                        MatILUFactor_SeqBAIJ,
2683                                        NULL,
2684                                /* 39*/ MatAXPY_SeqBAIJ,
2685                                        MatCreateSubMatrices_SeqBAIJ,
2686                                        MatIncreaseOverlap_SeqBAIJ,
2687                                        MatGetValues_SeqBAIJ,
2688                                        MatCopy_SeqBAIJ,
2689                                /* 44*/ NULL,
2690                                        MatScale_SeqBAIJ,
2691                                        MatShift_SeqBAIJ,
2692                                        NULL,
2693                                        MatZeroRowsColumns_SeqBAIJ,
2694                                /* 49*/ NULL,
2695                                        MatGetRowIJ_SeqBAIJ,
2696                                        MatRestoreRowIJ_SeqBAIJ,
2697                                        MatGetColumnIJ_SeqBAIJ,
2698                                        MatRestoreColumnIJ_SeqBAIJ,
2699                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                        MatSetValuesBlocked_SeqBAIJ,
2704                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2705                                        MatDestroy_SeqBAIJ,
2706                                        MatView_SeqBAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                /* 64*/ NULL,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                        NULL,
2714                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2715                                        NULL,
2716                                        MatConvert_Basic,
2717                                        NULL,
2718                                        NULL,
2719                                /* 74*/ NULL,
2720                                        MatFDColoringApply_BAIJ,
2721                                        NULL,
2722                                        NULL,
2723                                        NULL,
2724                                /* 79*/ NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        MatLoad_SeqBAIJ,
2729                                /* 84*/ NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                /* 89*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                /* 94*/ NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                /* 99*/ NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        MatConjugate_SeqBAIJ,
2748                                        NULL,
2749                                /*104*/ NULL,
2750                                        MatRealPart_SeqBAIJ,
2751                                        MatImaginaryPart_SeqBAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                /*109*/ NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        MatMissingDiagonal_SeqBAIJ,
2759                                /*114*/ NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                /*119*/ NULL,
2765                                        NULL,
2766                                        MatMultHermitianTranspose_SeqBAIJ,
2767                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2768                                        NULL,
2769                                /*124*/ NULL,
2770                                        MatGetColumnNorms_SeqBAIJ,
2771                                        MatInvertBlockDiagonal_SeqBAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                /*129*/ NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                /*134*/ NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                /*139*/ MatSetBlockSizes_Default,
2785                                        NULL,
2786                                        NULL,
2787                                        MatFDColoringSetUp_SeqXAIJ,
2788                                        NULL,
2789                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2790                                        MatDestroySubMatrices_SeqBAIJ
2791 };
2792 
2793 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2794 {
2795   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2796   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2801 
2802   /* allocate space for values if not already there */
2803   if (!aij->saved_values) {
2804     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2805     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2806   }
2807 
2808   /* copy values over */
2809   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2814 {
2815   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2816   PetscErrorCode ierr;
2817   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2818 
2819   PetscFunctionBegin;
2820   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2821   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2822 
2823   /* copy values over */
2824   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2830 
2831 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2832 {
2833   Mat_SeqBAIJ    *b;
2834   PetscErrorCode ierr;
2835   PetscInt       i,mbs,nbs,bs2;
2836   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2837 
2838   PetscFunctionBegin;
2839   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2840   if (nz == MAT_SKIP_ALLOCATION) {
2841     skipallocation = PETSC_TRUE;
2842     nz             = 0;
2843   }
2844 
2845   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2846   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2847   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2849 
2850   B->preallocated = PETSC_TRUE;
2851 
2852   mbs = B->rmap->n/bs;
2853   nbs = B->cmap->n/bs;
2854   bs2 = bs*bs;
2855 
2856   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2857 
2858   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2859   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2860   if (nnz) {
2861     for (i=0; i<mbs; i++) {
2862       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2863       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2864     }
2865   }
2866 
2867   b    = (Mat_SeqBAIJ*)B->data;
2868   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2869   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2870   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2871 
2872   if (!flg) {
2873     switch (bs) {
2874     case 1:
2875       B->ops->mult    = MatMult_SeqBAIJ_1;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2877       break;
2878     case 2:
2879       B->ops->mult    = MatMult_SeqBAIJ_2;
2880       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2881       break;
2882     case 3:
2883       B->ops->mult    = MatMult_SeqBAIJ_3;
2884       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2885       break;
2886     case 4:
2887       B->ops->mult    = MatMult_SeqBAIJ_4;
2888       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2889       break;
2890     case 5:
2891       B->ops->mult    = MatMult_SeqBAIJ_5;
2892       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2893       break;
2894     case 6:
2895       B->ops->mult    = MatMult_SeqBAIJ_6;
2896       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2897       break;
2898     case 7:
2899       B->ops->mult    = MatMult_SeqBAIJ_7;
2900       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2901       break;
2902     case 9:
2903     {
2904       PetscInt version = 1;
2905       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2906       switch (version) {
2907 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2908       case 1:
2909         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2910         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2911         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2912         break;
2913 #endif
2914       default:
2915         B->ops->mult    = MatMult_SeqBAIJ_N;
2916         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2917         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2918         break;
2919       }
2920       break;
2921     }
2922     case 11:
2923       B->ops->mult    = MatMult_SeqBAIJ_11;
2924       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2925       break;
2926     case 12:
2927     {
2928       PetscInt version = 1;
2929       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2930       switch (version) {
2931       case 1:
2932         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2933         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2934         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2935         break;
2936       case 2:
2937         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2938         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2939         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2940         break;
2941 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2942       case 3:
2943         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2944         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2945         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2946         break;
2947 #endif
2948       default:
2949         B->ops->mult    = MatMult_SeqBAIJ_N;
2950         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2951         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2952         break;
2953       }
2954       break;
2955     }
2956     case 15:
2957     {
2958       PetscInt version = 1;
2959       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2960       switch (version) {
2961       case 1:
2962         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2963         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2964         break;
2965       case 2:
2966         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2967         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2968         break;
2969       case 3:
2970         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2971         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2972         break;
2973       case 4:
2974         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2975         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2976         break;
2977       default:
2978         B->ops->mult    = MatMult_SeqBAIJ_N;
2979         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2980         break;
2981       }
2982       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2983       break;
2984     }
2985     default:
2986       B->ops->mult    = MatMult_SeqBAIJ_N;
2987       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2988       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2989       break;
2990     }
2991   }
2992   B->ops->sor = MatSOR_SeqBAIJ;
2993   b->mbs = mbs;
2994   b->nbs = nbs;
2995   if (!skipallocation) {
2996     if (!b->imax) {
2997       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2998       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2999 
3000       b->free_imax_ilen = PETSC_TRUE;
3001     }
3002     /* b->ilen will count nonzeros in each block row so far. */
3003     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3004     if (!nnz) {
3005       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3006       else if (nz < 0) nz = 1;
3007       nz = PetscMin(nz,nbs);
3008       for (i=0; i<mbs; i++) b->imax[i] = nz;
3009       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3010     } else {
3011       PetscInt64 nz64 = 0;
3012       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3013       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3014     }
3015 
3016     /* allocate the matrix space */
3017     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3018     if (B->structure_only) {
3019       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3020       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3021       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3022     } else {
3023       PetscInt nzbs2 = 0;
3024       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
3025       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
3026       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3027       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3028     }
3029     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
3030 
3031     if (B->structure_only) {
3032       b->singlemalloc = PETSC_FALSE;
3033       b->free_a       = PETSC_FALSE;
3034     } else {
3035       b->singlemalloc = PETSC_TRUE;
3036       b->free_a       = PETSC_TRUE;
3037     }
3038     b->free_ij = PETSC_TRUE;
3039 
3040     b->i[0] = 0;
3041     for (i=1; i<mbs+1; i++) {
3042       b->i[i] = b->i[i-1] + b->imax[i-1];
3043     }
3044 
3045   } else {
3046     b->free_a  = PETSC_FALSE;
3047     b->free_ij = PETSC_FALSE;
3048   }
3049 
3050   b->bs2              = bs2;
3051   b->mbs              = mbs;
3052   b->nz               = 0;
3053   b->maxnz            = nz;
3054   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3055   B->was_assembled    = PETSC_FALSE;
3056   B->assembled        = PETSC_FALSE;
3057   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3062 {
3063   PetscInt       i,m,nz,nz_max=0,*nnz;
3064   PetscScalar    *values=NULL;
3065   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3066   PetscErrorCode ierr;
3067 
3068   PetscFunctionBegin;
3069   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3070   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3071   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3072   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3073   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3074   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3075   m    = B->rmap->n/bs;
3076 
3077   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3078   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3079   for (i=0; i<m; i++) {
3080     nz = ii[i+1]- ii[i];
3081     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3082     nz_max = PetscMax(nz_max, nz);
3083     nnz[i] = nz;
3084   }
3085   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3086   ierr = PetscFree(nnz);CHKERRQ(ierr);
3087 
3088   values = (PetscScalar*)V;
3089   if (!values) {
3090     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3091   }
3092   for (i=0; i<m; i++) {
3093     PetscInt          ncols  = ii[i+1] - ii[i];
3094     const PetscInt    *icols = jj + ii[i];
3095     if (bs == 1 || !roworiented) {
3096       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3097       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3098     } else {
3099       PetscInt j;
3100       for (j=0; j<ncols; j++) {
3101         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3102         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3103       }
3104     }
3105   }
3106   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3107   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 /*@C
3114    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3115 
3116    Not Collective
3117 
3118    Input Parameter:
3119 .  mat - a MATSEQBAIJ matrix
3120 
3121    Output Parameter:
3122 .   array - pointer to the data
3123 
3124    Level: intermediate
3125 
3126 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3127 @*/
3128 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3129 {
3130   PetscErrorCode ierr;
3131 
3132   PetscFunctionBegin;
3133   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3134   PetscFunctionReturn(0);
3135 }
3136 
3137 /*@C
3138    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3139 
3140    Not Collective
3141 
3142    Input Parameters:
3143 +  mat - a MATSEQBAIJ matrix
3144 -  array - pointer to the data
3145 
3146    Level: intermediate
3147 
3148 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3149 @*/
3150 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3151 {
3152   PetscErrorCode ierr;
3153 
3154   PetscFunctionBegin;
3155   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 /*MC
3160    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3161    block sparse compressed row format.
3162 
3163    Options Database Keys:
3164 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3165 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3166 
3167    Level: beginner
3168 
3169    Notes:
3170     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3171     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3172 
3173    Run with -info to see what version of the matrix-vector product is being used
3174 
3175 .seealso: MatCreateSeqBAIJ()
3176 M*/
3177 
3178 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3179 
3180 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3181 {
3182   PetscErrorCode ierr;
3183   PetscMPIInt    size;
3184   Mat_SeqBAIJ    *b;
3185 
3186   PetscFunctionBegin;
3187   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3188   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3189 
3190   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3191   B->data = (void*)b;
3192   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3193 
3194   b->row          = NULL;
3195   b->col          = NULL;
3196   b->icol         = NULL;
3197   b->reallocs     = 0;
3198   b->saved_values = NULL;
3199 
3200   b->roworiented        = PETSC_TRUE;
3201   b->nonew              = 0;
3202   b->diag               = NULL;
3203   B->spptr              = NULL;
3204   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3205   b->keepnonzeropattern = PETSC_FALSE;
3206 
3207   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3208   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3210   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3211   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3212   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3213   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3214   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3215   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3217 #if defined(PETSC_HAVE_HYPRE)
3218   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3219 #endif
3220   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3221   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3226 {
3227   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3228   PetscErrorCode ierr;
3229   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3230 
3231   PetscFunctionBegin;
3232   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3233 
3234   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3235     c->imax           = a->imax;
3236     c->ilen           = a->ilen;
3237     c->free_imax_ilen = PETSC_FALSE;
3238   } else {
3239     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3240     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3241     for (i=0; i<mbs; i++) {
3242       c->imax[i] = a->imax[i];
3243       c->ilen[i] = a->ilen[i];
3244     }
3245     c->free_imax_ilen = PETSC_TRUE;
3246   }
3247 
3248   /* allocate the matrix space */
3249   if (mallocmatspace) {
3250     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3251       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3252       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3253 
3254       c->i            = a->i;
3255       c->j            = a->j;
3256       c->singlemalloc = PETSC_FALSE;
3257       c->free_a       = PETSC_TRUE;
3258       c->free_ij      = PETSC_FALSE;
3259       c->parent       = A;
3260       C->preallocated = PETSC_TRUE;
3261       C->assembled    = PETSC_TRUE;
3262 
3263       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3264       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3265       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3266     } else {
3267       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3268       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3269 
3270       c->singlemalloc = PETSC_TRUE;
3271       c->free_a       = PETSC_TRUE;
3272       c->free_ij      = PETSC_TRUE;
3273 
3274       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3275       if (mbs > 0) {
3276         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3277         if (cpvalues == MAT_COPY_VALUES) {
3278           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3279         } else {
3280           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3281         }
3282       }
3283       C->preallocated = PETSC_TRUE;
3284       C->assembled    = PETSC_TRUE;
3285     }
3286   }
3287 
3288   c->roworiented = a->roworiented;
3289   c->nonew       = a->nonew;
3290 
3291   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3292   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3293 
3294   c->bs2         = a->bs2;
3295   c->mbs         = a->mbs;
3296   c->nbs         = a->nbs;
3297 
3298   if (a->diag) {
3299     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3300       c->diag      = a->diag;
3301       c->free_diag = PETSC_FALSE;
3302     } else {
3303       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3304       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3305       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3306       c->free_diag = PETSC_TRUE;
3307     }
3308   } else c->diag = NULL;
3309 
3310   c->nz         = a->nz;
3311   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3312   c->solve_work = NULL;
3313   c->mult_work  = NULL;
3314   c->sor_workt  = NULL;
3315   c->sor_work   = NULL;
3316 
3317   c->compressedrow.use   = a->compressedrow.use;
3318   c->compressedrow.nrows = a->compressedrow.nrows;
3319   if (a->compressedrow.use) {
3320     i    = a->compressedrow.nrows;
3321     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3322     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3323     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3324     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3325   } else {
3326     c->compressedrow.use    = PETSC_FALSE;
3327     c->compressedrow.i      = NULL;
3328     c->compressedrow.rindex = NULL;
3329   }
3330   C->nonzerostate = A->nonzerostate;
3331 
3332   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3337 {
3338   PetscErrorCode ierr;
3339 
3340   PetscFunctionBegin;
3341   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3342   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3343   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3344   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3345   PetscFunctionReturn(0);
3346 }
3347 
3348 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3349 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3350 {
3351   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3352   PetscInt       *rowidxs,*colidxs;
3353   PetscScalar    *matvals;
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3358 
3359   /* read matrix header */
3360   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3361   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3362   M = header[1]; N = header[2]; nz = header[3];
3363   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3364   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3365   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3366 
3367   /* set block sizes from the viewer's .info file */
3368   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3369   /* set local and global sizes if not set already */
3370   if (mat->rmap->n < 0) mat->rmap->n = M;
3371   if (mat->cmap->n < 0) mat->cmap->n = N;
3372   if (mat->rmap->N < 0) mat->rmap->N = M;
3373   if (mat->cmap->N < 0) mat->cmap->N = N;
3374   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3375   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3376 
3377   /* check if the matrix sizes are correct */
3378   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3379   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3380   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3381   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3382   mbs = m/bs; nbs = n/bs;
3383 
3384   /* read in row lengths, column indices and nonzero values */
3385   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3386   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3387   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3388   sum = rowidxs[m];
3389   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3390 
3391   /* read in column indices and nonzero values */
3392   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3393   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3394   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3395 
3396   { /* preallocate matrix storage */
3397     PetscBT   bt; /* helper bit set to count nonzeros */
3398     PetscInt  *nnz;
3399     PetscBool sbaij;
3400 
3401     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3402     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3403     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3404     for (i=0; i<mbs; i++) {
3405       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3406       for (k=0; k<bs; k++) {
3407         PetscInt row = bs*i + k;
3408         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3409           PetscInt col = colidxs[j];
3410           if (!sbaij || col >= row)
3411             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3412         }
3413       }
3414     }
3415     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3416     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3417     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3418     ierr = PetscFree(nnz);CHKERRQ(ierr);
3419   }
3420 
3421   /* store matrix values */
3422   for (i=0; i<m; i++) {
3423     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3424     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3425   }
3426 
3427   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3428   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3429   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3430   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3431   PetscFunctionReturn(0);
3432 }
3433 
3434 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3435 {
3436   PetscErrorCode ierr;
3437   PetscBool      isbinary;
3438 
3439   PetscFunctionBegin;
3440   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3441   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3442   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 /*@C
3447    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3448    compressed row) format.  For good matrix assembly performance the
3449    user should preallocate the matrix storage by setting the parameter nz
3450    (or the array nnz).  By setting these parameters accurately, performance
3451    during matrix assembly can be increased by more than a factor of 50.
3452 
3453    Collective
3454 
3455    Input Parameters:
3456 +  comm - MPI communicator, set to PETSC_COMM_SELF
3457 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3458           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3459 .  m - number of rows
3460 .  n - number of columns
3461 .  nz - number of nonzero blocks  per block row (same for all rows)
3462 -  nnz - array containing the number of nonzero blocks in the various block rows
3463          (possibly different for each block row) or NULL
3464 
3465    Output Parameter:
3466 .  A - the matrix
3467 
3468    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3469    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3470    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3471 
3472    Options Database Keys:
3473 +   -mat_no_unroll - uses code that does not unroll the loops in the
3474                      block calculations (much slower)
3475 -    -mat_block_size - size of the blocks to use
3476 
3477    Level: intermediate
3478 
3479    Notes:
3480    The number of rows and columns must be divisible by blocksize.
3481 
3482    If the nnz parameter is given then the nz parameter is ignored
3483 
3484    A nonzero block is any block that as 1 or more nonzeros in it
3485 
3486    The block AIJ format is fully compatible with standard Fortran 77
3487    storage.  That is, the stored row and column indices can begin at
3488    either one (as in Fortran) or zero.  See the users' manual for details.
3489 
3490    Specify the preallocated storage with either nz or nnz (not both).
3491    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3492    allocation.  See Users-Manual: ch_mat for details.
3493    matrices.
3494 
3495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3496 @*/
3497 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3498 {
3499   PetscErrorCode ierr;
3500 
3501   PetscFunctionBegin;
3502   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3503   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3504   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3505   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3506   PetscFunctionReturn(0);
3507 }
3508 
3509 /*@C
3510    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3511    per row in the matrix. For good matrix assembly performance the
3512    user should preallocate the matrix storage by setting the parameter nz
3513    (or the array nnz).  By setting these parameters accurately, performance
3514    during matrix assembly can be increased by more than a factor of 50.
3515 
3516    Collective
3517 
3518    Input Parameters:
3519 +  B - the matrix
3520 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3521           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3522 .  nz - number of block nonzeros per block row (same for all rows)
3523 -  nnz - array containing the number of block nonzeros in the various block rows
3524          (possibly different for each block row) or NULL
3525 
3526    Options Database Keys:
3527 +   -mat_no_unroll - uses code that does not unroll the loops in the
3528                      block calculations (much slower)
3529 -   -mat_block_size - size of the blocks to use
3530 
3531    Level: intermediate
3532 
3533    Notes:
3534    If the nnz parameter is given then the nz parameter is ignored
3535 
3536    You can call MatGetInfo() to get information on how effective the preallocation was;
3537    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3538    You can also run with the option -info and look for messages with the string
3539    malloc in them to see if additional memory allocation was needed.
3540 
3541    The block AIJ format is fully compatible with standard Fortran 77
3542    storage.  That is, the stored row and column indices can begin at
3543    either one (as in Fortran) or zero.  See the users' manual for details.
3544 
3545    Specify the preallocated storage with either nz or nnz (not both).
3546    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3547    allocation.  See Users-Manual: ch_mat for details.
3548 
3549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3550 @*/
3551 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3552 {
3553   PetscErrorCode ierr;
3554 
3555   PetscFunctionBegin;
3556   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3557   PetscValidType(B,1);
3558   PetscValidLogicalCollectiveInt(B,bs,2);
3559   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 /*@C
3564    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3565 
3566    Collective
3567 
3568    Input Parameters:
3569 +  B - the matrix
3570 .  i - the indices into j for the start of each local row (starts with zero)
3571 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3572 -  v - optional values in the matrix
3573 
3574    Level: advanced
3575 
3576    Notes:
3577    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3578    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3579    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3580    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3581    block column and the second index is over columns within a block.
3582 
3583    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3584 
3585 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3586 @*/
3587 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3588 {
3589   PetscErrorCode ierr;
3590 
3591   PetscFunctionBegin;
3592   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3593   PetscValidType(B,1);
3594   PetscValidLogicalCollectiveInt(B,bs,2);
3595   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3596   PetscFunctionReturn(0);
3597 }
3598 
3599 /*@
3600      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3601 
3602      Collective
3603 
3604    Input Parameters:
3605 +  comm - must be an MPI communicator of size 1
3606 .  bs - size of block
3607 .  m - number of rows
3608 .  n - number of columns
3609 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3610 .  j - column indices
3611 -  a - matrix values
3612 
3613    Output Parameter:
3614 .  mat - the matrix
3615 
3616    Level: advanced
3617 
3618    Notes:
3619        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3620     once the matrix is destroyed
3621 
3622        You cannot set new nonzero locations into this matrix, that will generate an error.
3623 
3624        The i and j indices are 0 based
3625 
3626        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3627 
3628       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3629       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3630       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3631       with column-major ordering within blocks.
3632 
3633 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3634 
3635 @*/
3636 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3637 {
3638   PetscErrorCode ierr;
3639   PetscInt       ii;
3640   Mat_SeqBAIJ    *baij;
3641 
3642   PetscFunctionBegin;
3643   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3644   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3645 
3646   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3647   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3648   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3649   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3650   baij = (Mat_SeqBAIJ*)(*mat)->data;
3651   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3652   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3653 
3654   baij->i = i;
3655   baij->j = j;
3656   baij->a = a;
3657 
3658   baij->singlemalloc = PETSC_FALSE;
3659   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3660   baij->free_a       = PETSC_FALSE;
3661   baij->free_ij      = PETSC_FALSE;
3662 
3663   for (ii=0; ii<m; ii++) {
3664     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3665     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3666   }
3667   if (PetscDefined(USE_DEBUG)) {
3668     for (ii=0; ii<baij->i[m]; ii++) {
3669       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3670       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3671     }
3672   }
3673 
3674   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3675   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3676   PetscFunctionReturn(0);
3677 }
3678 
3679 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3680 {
3681   PetscErrorCode ierr;
3682   PetscMPIInt    size;
3683 
3684   PetscFunctionBegin;
3685   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3686   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3687     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3688   } else {
3689     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3690   }
3691   PetscFunctionReturn(0);
3692 }
3693