xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9463ebdafcaa1297f0cc5126b9b2ebb9458fdb8c)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms)
21 {
22   PetscErrorCode ierr;
23   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24   PetscInt       N,i;
25   PetscInt       ib,jb,bs = A->rmap->bs;
26   MatScalar      *a_val = a_aij->a;
27 
28   PetscFunctionBegin;
29   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
30   for (i=0; i<N; i++) norms[i] = 0.0;
31   if (type == NORM_2) {
32     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33       for (jb=0; jb<bs; jb++) {
34         for (ib=0; ib<bs; ib++) {
35           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36           a_val++;
37         }
38       }
39     }
40   } else if (type == NORM_1) {
41     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42       for (jb=0; jb<bs; jb++) {
43         for (ib=0; ib<bs; ib++) {
44           norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45           a_val++;
46         }
47       }
48     }
49   } else if (type == NORM_INFINITY) {
50     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51       for (jb=0; jb<bs; jb++) {
52         for (ib=0; ib<bs; ib++) {
53           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54           norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]);
55           a_val++;
56         }
57       }
58     }
59   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
60   if (type == NORM_2) {
61     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
62   }
63   PetscFunctionReturn(0);
64 }
65 
66 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
67 {
68   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
69   PetscErrorCode ierr;
70   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
71   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
72   PetscReal      shift = 0.0;
73   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
74 
75   PetscFunctionBegin;
76   allowzeropivot = PetscNot(A->erroriffailure);
77 
78   if (a->idiagvalid) {
79     if (values) *values = a->idiag;
80     PetscFunctionReturn(0);
81   }
82   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
83   diag_offset = a->diag;
84   if (!a->idiag) {
85     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
86     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
87   }
88   diag  = a->idiag;
89   if (values) *values = a->idiag;
90   /* factor and invert each block */
91   switch (bs) {
92   case 1:
93     for (i=0; i<mbs; i++) {
94       odiag    = v + 1*diag_offset[i];
95       diag[0]  = odiag[0];
96 
97       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
98         if (allowzeropivot) {
99           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
101           A->factorerror_zeropivot_row   = i;
102           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
103         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
104       }
105 
106       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
107       diag    += 1;
108     }
109     break;
110   case 2:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 4*diag_offset[i];
113       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
114       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116       diag    += 4;
117     }
118     break;
119   case 3:
120     for (i=0; i<mbs; i++) {
121       odiag    = v + 9*diag_offset[i];
122       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
123       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
124       diag[8]  = odiag[8];
125       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag    += 9;
128     }
129     break;
130   case 4:
131     for (i=0; i<mbs; i++) {
132       odiag  = v + 16*diag_offset[i];
133       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
134       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag  += 16;
137     }
138     break;
139   case 5:
140     for (i=0; i<mbs; i++) {
141       odiag  = v + 25*diag_offset[i];
142       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
143       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag  += 25;
146     }
147     break;
148   case 6:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 36*diag_offset[i];
151       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
152       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 36;
155     }
156     break;
157   case 7:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 49*diag_offset[i];
160       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
161       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 49;
164     }
165     break;
166   default:
167     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
168     for (i=0; i<mbs; i++) {
169       odiag  = v + bs2*diag_offset[i];
170       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
171       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
172       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
173       diag  += bs2;
174     }
175     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
176   }
177   a->idiagvalid = PETSC_TRUE;
178   PetscFunctionReturn(0);
179 }
180 
181 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
182 {
183   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
184   PetscScalar       *x,*work,*w,*workt,*t;
185   const MatScalar   *v,*aa = a->a, *idiag;
186   const PetscScalar *b,*xb;
187   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
188   PetscErrorCode    ierr;
189   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
190   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
191 
192   PetscFunctionBegin;
193   its = its*lits;
194   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
195   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
196   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
197   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
198   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
199 
200   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
201 
202   if (!m) PetscFunctionReturn(0);
203   diag  = a->diag;
204   idiag = a->idiag;
205   k    = PetscMax(A->rmap->n,A->cmap->n);
206   if (!a->mult_work) {
207     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
208   }
209   if (!a->sor_workt) {
210     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
211   }
212   if (!a->sor_work) {
213     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
214   }
215   work = a->mult_work;
216   t    = a->sor_workt;
217   w    = a->sor_work;
218 
219   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
220   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
221 
222   if (flag & SOR_ZERO_INITIAL_GUESS) {
223     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
224       switch (bs) {
225       case 1:
226         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
227         t[0] = b[0];
228         i2     = 1;
229         idiag += 1;
230         for (i=1; i<m; i++) {
231           v  = aa + ai[i];
232           vi = aj + ai[i];
233           nz = diag[i] - ai[i];
234           s[0] = b[i2];
235           for (j=0; j<nz; j++) {
236             xw[0] = x[vi[j]];
237             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
238           }
239           t[i2] = s[0];
240           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
241           x[i2]  = xw[0];
242           idiag += 1;
243           i2    += 1;
244         }
245         break;
246       case 2:
247         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1];
249         i2     = 2;
250         idiag += 4;
251         for (i=1; i<m; i++) {
252           v  = aa + 4*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1];
256           for (j=0; j<nz; j++) {
257             idx = 2*vi[j];
258             it  = 4*j;
259             xw[0] = x[idx]; xw[1] = x[1+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
261           }
262           t[i2] = s[0]; t[i2+1] = s[1];
263           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
264           x[i2]   = xw[0]; x[i2+1] = xw[1];
265           idiag  += 4;
266           i2     += 2;
267         }
268         break;
269       case 3:
270         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
272         i2     = 3;
273         idiag += 9;
274         for (i=1; i<m; i++) {
275           v  = aa + 9*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
279           while (nz--) {
280             idx = 3*(*vi++);
281             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
283             v  += 9;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
286           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
288           idiag  += 9;
289           i2     += 3;
290         }
291         break;
292       case 4:
293         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
295         i2     = 4;
296         idiag += 16;
297         for (i=1; i<m; i++) {
298           v  = aa + 16*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
302           while (nz--) {
303             idx = 4*(*vi++);
304             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
305             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
306             v  += 16;
307           }
308           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
309           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
310           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
311           idiag  += 16;
312           i2     += 4;
313         }
314         break;
315       case 5:
316         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
317         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
318         i2     = 5;
319         idiag += 25;
320         for (i=1; i<m; i++) {
321           v  = aa + 25*ai[i];
322           vi = aj + ai[i];
323           nz = diag[i] - ai[i];
324           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
325           while (nz--) {
326             idx = 5*(*vi++);
327             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
328             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
329             v  += 25;
330           }
331           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
332           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
333           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
334           idiag  += 25;
335           i2     += 5;
336         }
337         break;
338       case 6:
339         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
340         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
341         i2     = 6;
342         idiag += 36;
343         for (i=1; i<m; i++) {
344           v  = aa + 36*ai[i];
345           vi = aj + ai[i];
346           nz = diag[i] - ai[i];
347           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
348           while (nz--) {
349             idx = 6*(*vi++);
350             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
352             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
353             v  += 36;
354           }
355           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
357           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
358           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
359           idiag  += 36;
360           i2     += 6;
361         }
362         break;
363       case 7:
364         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
365         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
366         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
367         i2     = 7;
368         idiag += 49;
369         for (i=1; i<m; i++) {
370           v  = aa + 49*ai[i];
371           vi = aj + ai[i];
372           nz = diag[i] - ai[i];
373           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
374           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
375           while (nz--) {
376             idx = 7*(*vi++);
377             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
378             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
379             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
380             v  += 49;
381           }
382           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
383           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
384           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
385           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
386           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
387           idiag  += 49;
388           i2     += 7;
389         }
390         break;
391       default:
392         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
393         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
394         i2     = bs;
395         idiag += bs2;
396         for (i=1; i<m; i++) {
397           v  = aa + bs2*ai[i];
398           vi = aj + ai[i];
399           nz = diag[i] - ai[i];
400 
401           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
402           /* copy all rows of x that are needed into contiguous space */
403           workt = work;
404           for (j=0; j<nz; j++) {
405             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
406             workt += bs;
407           }
408           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
409           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
410           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
411 
412           idiag += bs2;
413           i2    += bs;
414         }
415         break;
416       }
417       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
418       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
419       xb = t;
420     }
421     else xb = b;
422     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
423       idiag = a->idiag+bs2*(a->mbs-1);
424       i2 = bs * (m-1);
425       switch (bs) {
426       case 1:
427         s[0]  = xb[i2];
428         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
429         x[i2] = xw[0];
430         i2   -= 1;
431         for (i=m-2; i>=0; i--) {
432           v  = aa + (diag[i]+1);
433           vi = aj + diag[i] + 1;
434           nz = ai[i+1] - diag[i] - 1;
435           s[0] = xb[i2];
436           for (j=0; j<nz; j++) {
437             xw[0] = x[vi[j]];
438             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
439           }
440           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
441           x[i2]  = xw[0];
442           idiag -= 1;
443           i2    -= 1;
444         }
445         break;
446       case 2:
447         s[0]  = xb[i2]; s[1] = xb[i2+1];
448         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1];
450         i2    -= 2;
451         idiag -= 4;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 4*(diag[i] + 1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1];
457           for (j=0; j<nz; j++) {
458             idx = 2*vi[j];
459             it  = 4*j;
460             xw[0] = x[idx]; xw[1] = x[1+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
462           }
463           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
464           x[i2]   = xw[0]; x[i2+1] = xw[1];
465           idiag  -= 4;
466           i2     -= 2;
467         }
468         break;
469       case 3:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
471         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
473         i2    -= 3;
474         idiag -= 9;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 9*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
480           while (nz--) {
481             idx = 3*(*vi++);
482             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
484             v  += 9;
485           }
486           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
488           idiag  -= 9;
489           i2     -= 3;
490         }
491         break;
492       case 4:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
494         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
496         i2    -= 4;
497         idiag -= 16;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 16*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
503           while (nz--) {
504             idx = 4*(*vi++);
505             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
506             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
507             v  += 16;
508           }
509           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
510           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
511           idiag  -= 16;
512           i2     -= 4;
513         }
514         break;
515       case 5:
516         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
517         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
518         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
519         i2    -= 5;
520         idiag -= 25;
521         for (i=m-2; i>=0; i--) {
522           v  = aa + 25*(diag[i]+1);
523           vi = aj + diag[i] + 1;
524           nz = ai[i+1] - diag[i] - 1;
525           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
526           while (nz--) {
527             idx = 5*(*vi++);
528             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
529             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
530             v  += 25;
531           }
532           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
533           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
534           idiag  -= 25;
535           i2     -= 5;
536         }
537         break;
538       case 6:
539         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
540         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
541         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
542         i2    -= 6;
543         idiag -= 36;
544         for (i=m-2; i>=0; i--) {
545           v  = aa + 36*(diag[i]+1);
546           vi = aj + diag[i] + 1;
547           nz = ai[i+1] - diag[i] - 1;
548           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
549           while (nz--) {
550             idx = 6*(*vi++);
551             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
553             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
554             v  += 36;
555           }
556           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
557           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
558           idiag  -= 36;
559           i2     -= 6;
560         }
561         break;
562       case 7:
563         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
564         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
565         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
566         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
567         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
568         i2    -= 7;
569         idiag -= 49;
570         for (i=m-2; i>=0; i--) {
571           v  = aa + 49*(diag[i]+1);
572           vi = aj + diag[i] + 1;
573           nz = ai[i+1] - diag[i] - 1;
574           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
575           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
576           while (nz--) {
577             idx = 7*(*vi++);
578             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
579             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
580             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
581             v  += 49;
582           }
583           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
584           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
585           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
586           idiag  -= 49;
587           i2     -= 7;
588         }
589         break;
590       default:
591         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
592         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
593         i2    -= bs;
594         idiag -= bs2;
595         for (i=m-2; i>=0; i--) {
596           v  = aa + bs2*(diag[i]+1);
597           vi = aj + diag[i] + 1;
598           nz = ai[i+1] - diag[i] - 1;
599 
600           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
601           /* copy all rows of x that are needed into contiguous space */
602           workt = work;
603           for (j=0; j<nz; j++) {
604             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
605             workt += bs;
606           }
607           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
608           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
609 
610           idiag -= bs2;
611           i2    -= bs;
612         }
613         break;
614       }
615       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
616     }
617     its--;
618   }
619   while (its--) {
620     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
621       idiag = a->idiag;
622       i2 = 0;
623       switch (bs) {
624       case 1:
625         for (i=0; i<m; i++) {
626           v  = aa + ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2];
630           for (j=0; j<nz; j++) {
631             xw[0] = x[vi[j]];
632             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
633           }
634           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
635           x[i2] += xw[0];
636           idiag += 1;
637           i2    += 1;
638         }
639         break;
640       case 2:
641         for (i=0; i<m; i++) {
642           v  = aa + 4*ai[i];
643           vi = aj + ai[i];
644           nz = ai[i+1] - ai[i];
645           s[0] = b[i2]; s[1] = b[i2+1];
646           for (j=0; j<nz; j++) {
647             idx = 2*vi[j];
648             it  = 4*j;
649             xw[0] = x[idx]; xw[1] = x[1+idx];
650             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
651           }
652           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
653           x[i2]  += xw[0]; x[i2+1] += xw[1];
654           idiag  += 4;
655           i2     += 2;
656         }
657         break;
658       case 3:
659         for (i=0; i<m; i++) {
660           v  = aa + 9*ai[i];
661           vi = aj + ai[i];
662           nz = ai[i+1] - ai[i];
663           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
664           while (nz--) {
665             idx = 3*(*vi++);
666             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
668             v  += 9;
669           }
670           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           idiag  += 9;
673           i2     += 3;
674         }
675         break;
676       case 4:
677         for (i=0; i<m; i++) {
678           v  = aa + 16*ai[i];
679           vi = aj + ai[i];
680           nz = ai[i+1] - ai[i];
681           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
682           while (nz--) {
683             idx = 4*(*vi++);
684             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
685             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
686             v  += 16;
687           }
688           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
689           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
690           idiag  += 16;
691           i2     += 4;
692         }
693         break;
694       case 5:
695         for (i=0; i<m; i++) {
696           v  = aa + 25*ai[i];
697           vi = aj + ai[i];
698           nz = ai[i+1] - ai[i];
699           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
700           while (nz--) {
701             idx = 5*(*vi++);
702             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
703             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
704             v  += 25;
705           }
706           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
707           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
708           idiag  += 25;
709           i2     += 5;
710         }
711         break;
712       case 6:
713         for (i=0; i<m; i++) {
714           v  = aa + 36*ai[i];
715           vi = aj + ai[i];
716           nz = ai[i+1] - ai[i];
717           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
718           while (nz--) {
719             idx = 6*(*vi++);
720             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
721             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
722             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
723             v  += 36;
724           }
725           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
726           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
727           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
728           idiag  += 36;
729           i2     += 6;
730         }
731         break;
732       case 7:
733         for (i=0; i<m; i++) {
734           v  = aa + 49*ai[i];
735           vi = aj + ai[i];
736           nz = ai[i+1] - ai[i];
737           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
738           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
739           while (nz--) {
740             idx = 7*(*vi++);
741             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
742             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
743             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
744             v  += 49;
745           }
746           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
747           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
748           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
749           idiag  += 49;
750           i2     += 7;
751         }
752         break;
753       default:
754         for (i=0; i<m; i++) {
755           v  = aa + bs2*ai[i];
756           vi = aj + ai[i];
757           nz = ai[i+1] - ai[i];
758 
759           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
760           /* copy all rows of x that are needed into contiguous space */
761           workt = work;
762           for (j=0; j<nz; j++) {
763             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
764             workt += bs;
765           }
766           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
767           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
768 
769           idiag += bs2;
770           i2    += bs;
771         }
772         break;
773       }
774       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
775     }
776     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
777       idiag = a->idiag+bs2*(a->mbs-1);
778       i2 = bs * (m-1);
779       switch (bs) {
780       case 1:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2];
786           for (j=0; j<nz; j++) {
787             xw[0] = x[vi[j]];
788             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
789           }
790           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
791           x[i2] += xw[0];
792           idiag -= 1;
793           i2    -= 1;
794         }
795         break;
796       case 2:
797         for (i=m-1; i>=0; i--) {
798           v  = aa + 4*ai[i];
799           vi = aj + ai[i];
800           nz = ai[i+1] - ai[i];
801           s[0] = b[i2]; s[1] = b[i2+1];
802           for (j=0; j<nz; j++) {
803             idx = 2*vi[j];
804             it  = 4*j;
805             xw[0] = x[idx]; xw[1] = x[1+idx];
806             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
807           }
808           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
809           x[i2]  += xw[0]; x[i2+1] += xw[1];
810           idiag  -= 4;
811           i2     -= 2;
812         }
813         break;
814       case 3:
815         for (i=m-1; i>=0; i--) {
816           v  = aa + 9*ai[i];
817           vi = aj + ai[i];
818           nz = ai[i+1] - ai[i];
819           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
820           while (nz--) {
821             idx = 3*(*vi++);
822             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
824             v  += 9;
825           }
826           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           idiag  -= 9;
829           i2     -= 3;
830         }
831         break;
832       case 4:
833         for (i=m-1; i>=0; i--) {
834           v  = aa + 16*ai[i];
835           vi = aj + ai[i];
836           nz = ai[i+1] - ai[i];
837           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
838           while (nz--) {
839             idx = 4*(*vi++);
840             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
841             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
842             v  += 16;
843           }
844           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
845           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
846           idiag  -= 16;
847           i2     -= 4;
848         }
849         break;
850       case 5:
851         for (i=m-1; i>=0; i--) {
852           v  = aa + 25*ai[i];
853           vi = aj + ai[i];
854           nz = ai[i+1] - ai[i];
855           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
856           while (nz--) {
857             idx = 5*(*vi++);
858             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
859             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
860             v  += 25;
861           }
862           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
863           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
864           idiag  -= 25;
865           i2     -= 5;
866         }
867         break;
868       case 6:
869         for (i=m-1; i>=0; i--) {
870           v  = aa + 36*ai[i];
871           vi = aj + ai[i];
872           nz = ai[i+1] - ai[i];
873           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
874           while (nz--) {
875             idx = 6*(*vi++);
876             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
877             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
878             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
879             v  += 36;
880           }
881           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
882           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
883           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
884           idiag  -= 36;
885           i2     -= 6;
886         }
887         break;
888       case 7:
889         for (i=m-1; i>=0; i--) {
890           v  = aa + 49*ai[i];
891           vi = aj + ai[i];
892           nz = ai[i+1] - ai[i];
893           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
894           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
895           while (nz--) {
896             idx = 7*(*vi++);
897             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
898             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
899             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
900             v  += 49;
901           }
902           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
903           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
904           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
905           idiag  -= 49;
906           i2     -= 7;
907         }
908         break;
909       default:
910         for (i=m-1; i>=0; i--) {
911           v  = aa + bs2*ai[i];
912           vi = aj + ai[i];
913           nz = ai[i+1] - ai[i];
914 
915           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
916           /* copy all rows of x that are needed into contiguous space */
917           workt = work;
918           for (j=0; j<nz; j++) {
919             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
920             workt += bs;
921           }
922           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
923           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
924 
925           idiag -= bs2;
926           i2    -= bs;
927         }
928         break;
929       }
930       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
931     }
932   }
933   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
934   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 /*
939     Special version for direct calls from Fortran (Used in PETSc-fun3d)
940 */
941 #if defined(PETSC_HAVE_FORTRAN_CAPS)
942 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
943 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
944 #define matsetvaluesblocked4_ matsetvaluesblocked4
945 #endif
946 
947 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948 {
949   Mat               A  = *AA;
950   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
951   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952   PetscInt          *ai    =a->i,*ailen=a->ilen;
953   PetscInt          *aj    =a->j,stepval,lastcol = -1;
954   const PetscScalar *value = v;
955   MatScalar         *ap,*aa = a->a,*bap;
956   PetscErrorCode    ierr;
957 
958   PetscFunctionBegin;
959   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
960   stepval = (n-1)*4;
961   for (k=0; k<m; k++) { /* loop over added rows */
962     row  = im[k];
963     rp   = aj + ai[row];
964     ap   = aa + 16*ai[row];
965     nrow = ailen[row];
966     low  = 0;
967     high = nrow;
968     for (l=0; l<n; l++) { /* loop over added columns */
969       col = in[l];
970       if (col <= lastcol)  low = 0;
971       else                high = nrow;
972       lastcol = col;
973       value   = v + k*(stepval+4 + l)*4;
974       while (high-low > 7) {
975         t = (low+high)/2;
976         if (rp[t] > col) high = t;
977         else             low  = t;
978       }
979       for (i=low; i<high; i++) {
980         if (rp[i] > col) break;
981         if (rp[i] == col) {
982           bap = ap +  16*i;
983           for (ii=0; ii<4; ii++,value+=stepval) {
984             for (jj=ii; jj<16; jj+=4) {
985               bap[jj] += *value++;
986             }
987           }
988           goto noinsert2;
989         }
990       }
991       N = nrow++ - 1;
992       high++; /* added new column index thus must search to one higher than before */
993       /* shift up all the later entries in this row */
994       for (ii=N; ii>=i; ii--) {
995         rp[ii+1] = rp[ii];
996         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
997       }
998       if (N >= i) {
999         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1000       }
1001       rp[i] = col;
1002       bap   = ap +  16*i;
1003       for (ii=0; ii<4; ii++,value+=stepval) {
1004         for (jj=ii; jj<16; jj+=4) {
1005           bap[jj] = *value++;
1006         }
1007       }
1008       noinsert2:;
1009       low = i;
1010     }
1011     ailen[row] = nrow;
1012   }
1013   PetscFunctionReturnVoid();
1014 }
1015 
1016 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1017 #define matsetvalues4_ MATSETVALUES4
1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1019 #define matsetvalues4_ matsetvalues4
1020 #endif
1021 
1022 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1023 {
1024   Mat         A  = *AA;
1025   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1026   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1027   PetscInt    *ai=a->i,*ailen=a->ilen;
1028   PetscInt    *aj=a->j,brow,bcol;
1029   PetscInt    ridx,cidx,lastcol = -1;
1030   MatScalar   *ap,value,*aa=a->a,*bap;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   for (k=0; k<m; k++) { /* loop over added rows */
1035     row  = im[k]; brow = row/4;
1036     rp   = aj + ai[brow];
1037     ap   = aa + 16*ai[brow];
1038     nrow = ailen[brow];
1039     low  = 0;
1040     high = nrow;
1041     for (l=0; l<n; l++) { /* loop over added columns */
1042       col   = in[l]; bcol = col/4;
1043       ridx  = row % 4; cidx = col % 4;
1044       value = v[l + k*n];
1045       if (col <= lastcol)  low = 0;
1046       else                high = nrow;
1047       lastcol = col;
1048       while (high-low > 7) {
1049         t = (low+high)/2;
1050         if (rp[t] > bcol) high = t;
1051         else              low  = t;
1052       }
1053       for (i=low; i<high; i++) {
1054         if (rp[i] > bcol) break;
1055         if (rp[i] == bcol) {
1056           bap   = ap +  16*i + 4*cidx + ridx;
1057           *bap += value;
1058           goto noinsert1;
1059         }
1060       }
1061       N = nrow++ - 1;
1062       high++; /* added new column thus must search to one higher than before */
1063       /* shift up all the later entries in this row */
1064       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1065       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1066       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1067       rp[i]                    = bcol;
1068       ap[16*i + 4*cidx + ridx] = value;
1069 noinsert1:;
1070       low = i;
1071     }
1072     ailen[brow] = nrow;
1073   }
1074   PetscFunctionReturnVoid();
1075 }
1076 
1077 /*
1078      Checks for missing diagonals
1079 */
1080 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1081 {
1082   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       *diag,*ii = a->i,i;
1085 
1086   PetscFunctionBegin;
1087   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1088   *missing = PETSC_FALSE;
1089   if (A->rmap->n > 0 && !ii) {
1090     *missing = PETSC_TRUE;
1091     if (d) *d = 0;
1092     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1093   } else {
1094     PetscInt n;
1095     n = PetscMin(a->mbs, a->nbs);
1096     diag = a->diag;
1097     for (i=0; i<n; i++) {
1098       if (diag[i] >= ii[i+1]) {
1099         *missing = PETSC_TRUE;
1100         if (d) *d = i;
1101         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1102         break;
1103       }
1104     }
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113   PetscInt       i,j,m = a->mbs;
1114 
1115   PetscFunctionBegin;
1116   if (!a->diag) {
1117     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1118     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1119     a->free_diag = PETSC_TRUE;
1120   }
1121   for (i=0; i<m; i++) {
1122     a->diag[i] = a->i[i+1];
1123     for (j=a->i[i]; j<a->i[i+1]; j++) {
1124       if (a->j[j] == i) {
1125         a->diag[i] = j;
1126         break;
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1134 {
1135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1138   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1139 
1140   PetscFunctionBegin;
1141   *nn = n;
1142   if (!ia) PetscFunctionReturn(0);
1143   if (symmetric) {
1144     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1145     nz   = tia[n];
1146   } else {
1147     tia = a->i; tja = a->j;
1148   }
1149 
1150   if (!blockcompressed && bs > 1) {
1151     (*nn) *= bs;
1152     /* malloc & create the natural set of indices */
1153     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1154     if (n) {
1155       (*ia)[0] = oshift;
1156       for (j=1; j<bs; j++) {
1157         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1158       }
1159     }
1160 
1161     for (i=1; i<n; i++) {
1162       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1163       for (j=1; j<bs; j++) {
1164         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1165       }
1166     }
1167     if (n) {
1168       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1169     }
1170 
1171     if (inja) {
1172       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1173       cnt = 0;
1174       for (i=0; i<n; i++) {
1175         for (j=0; j<bs; j++) {
1176           for (k=tia[i]; k<tia[i+1]; k++) {
1177             for (l=0; l<bs; l++) {
1178               (*ja)[cnt++] = bs*tja[k] + l;
1179             }
1180           }
1181         }
1182       }
1183     }
1184 
1185     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1186       ierr = PetscFree(tia);CHKERRQ(ierr);
1187       ierr = PetscFree(tja);CHKERRQ(ierr);
1188     }
1189   } else if (oshift == 1) {
1190     if (symmetric) {
1191       nz = tia[A->rmap->n/bs];
1192       /*  add 1 to i and j indices */
1193       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1194       *ia = tia;
1195       if (ja) {
1196         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1197         *ja = tja;
1198       }
1199     } else {
1200       nz = a->i[A->rmap->n/bs];
1201       /* malloc space and  add 1 to i and j indices */
1202       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1203       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1204       if (ja) {
1205         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1206         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1207       }
1208     }
1209   } else {
1210     *ia = tia;
1211     if (ja) *ja = tja;
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1217 {
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1230 {
1231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235 #if defined(PETSC_USE_LOG)
1236   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1237 #endif
1238   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1239   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1240   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1241   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1242   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1243   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1244   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1245   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1246   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1247   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1248   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1249   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1250   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1251 
1252   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1253   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1254   ierr = PetscFree(A->data);CHKERRQ(ierr);
1255 
1256   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1260   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1261   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1262   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1265   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1269 #if defined(PETSC_HAVE_HYPRE)
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1271 #endif
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1277 {
1278   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   switch (op) {
1283   case MAT_ROW_ORIENTED:
1284     a->roworiented = flg;
1285     break;
1286   case MAT_KEEP_NONZERO_PATTERN:
1287     a->keepnonzeropattern = flg;
1288     break;
1289   case MAT_NEW_NONZERO_LOCATIONS:
1290     a->nonew = (flg ? 0 : 1);
1291     break;
1292   case MAT_NEW_NONZERO_LOCATION_ERR:
1293     a->nonew = (flg ? -1 : 0);
1294     break;
1295   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1296     a->nonew = (flg ? -2 : 0);
1297     break;
1298   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1299     a->nounused = (flg ? -1 : 0);
1300     break;
1301   case MAT_FORCE_DIAGONAL_ENTRIES:
1302   case MAT_IGNORE_OFF_PROC_ENTRIES:
1303   case MAT_USE_HASH_TABLE:
1304   case MAT_SORTED_FULL:
1305     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1306     break;
1307   case MAT_SPD:
1308   case MAT_SYMMETRIC:
1309   case MAT_STRUCTURALLY_SYMMETRIC:
1310   case MAT_HERMITIAN:
1311   case MAT_SYMMETRY_ETERNAL:
1312   case MAT_SUBMAT_SINGLEIS:
1313   case MAT_STRUCTURE_ONLY:
1314     /* These options are handled directly by MatSetOption() */
1315     break;
1316   default:
1317     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1323 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1324 {
1325   PetscErrorCode ierr;
1326   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1327   MatScalar      *aa_i;
1328   PetscScalar    *v_i;
1329 
1330   PetscFunctionBegin;
1331   bs  = A->rmap->bs;
1332   bs2 = bs*bs;
1333   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1334 
1335   bn  = row/bs;   /* Block number */
1336   bp  = row % bs; /* Block Position */
1337   M   = ai[bn+1] - ai[bn];
1338   *nz = bs*M;
1339 
1340   if (v) {
1341     *v = NULL;
1342     if (*nz) {
1343       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1344       for (i=0; i<M; i++) { /* for each block in the block row */
1345         v_i  = *v + i*bs;
1346         aa_i = aa + bs2*(ai[bn] + i);
1347         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1348       }
1349     }
1350   }
1351 
1352   if (idx) {
1353     *idx = NULL;
1354     if (*nz) {
1355       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1356       for (i=0; i<M; i++) { /* for each block in the block row */
1357         idx_i = *idx + i*bs;
1358         itmp  = bs*aj[ai[bn] + i];
1359         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1360       }
1361     }
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1367 {
1368   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1377 {
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   if (nz)  *nz = 0;
1382   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1383   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1388 {
1389   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1390   Mat            C;
1391   PetscErrorCode ierr;
1392   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1393   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1394   MatScalar      *ata,*aa=a->a;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1398   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1399     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1400 
1401     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1402     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1403     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1404     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1405 
1406     at  = (Mat_SeqBAIJ*)C->data;
1407     ati = at->i;
1408     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1409   } else {
1410     C = *B;
1411     at = (Mat_SeqBAIJ*)C->data;
1412     ati = at->i;
1413   }
1414 
1415   atj = at->j;
1416   ata = at->a;
1417 
1418   /* Copy ati into atfill so we have locations of the next free space in atj */
1419   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1420 
1421   /* Walk through A row-wise and mark nonzero entries of A^T. */
1422   for (i=0; i<mbs; i++) {
1423     anzj = ai[i+1] - ai[i];
1424     for (j=0; j<anzj; j++) {
1425       atj[atfill[*aj]] = i;
1426       for (kr=0; kr<bs; kr++) {
1427         for (k=0; k<bs; k++) {
1428           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1429         }
1430       }
1431       atfill[*aj++] += 1;
1432     }
1433   }
1434   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436 
1437   /* Clean up temporary space and complete requests. */
1438   ierr = PetscFree(atfill);CHKERRQ(ierr);
1439 
1440   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1441     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1442     *B = C;
1443   } else {
1444     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1450 {
1451   PetscErrorCode ierr;
1452   Mat            Btrans;
1453 
1454   PetscFunctionBegin;
1455   *f   = PETSC_FALSE;
1456   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1457   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1458   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1463 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1464 {
1465   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1466   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1467   PetscInt       *rowlens,*colidxs;
1468   PetscScalar    *matvals;
1469   PetscErrorCode ierr;
1470 
1471   PetscFunctionBegin;
1472   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1473 
1474   M  = mat->rmap->N;
1475   N  = mat->cmap->N;
1476   m  = mat->rmap->n;
1477   bs = mat->rmap->bs;
1478   nz = bs*bs*A->nz;
1479 
1480   /* write matrix header */
1481   header[0] = MAT_FILE_CLASSID;
1482   header[1] = M; header[2] = N; header[3] = nz;
1483   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1484 
1485   /* store row lengths */
1486   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1487   for (cnt=0, i=0; i<A->mbs; i++)
1488     for (j=0; j<bs; j++)
1489       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1490   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1491   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1492 
1493   /* store column indices  */
1494   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1495   for (cnt=0, i=0; i<A->mbs; i++)
1496     for (k=0; k<bs; k++)
1497       for (j=A->i[i]; j<A->i[i+1]; j++)
1498         for (l=0; l<bs; l++)
1499           colidxs[cnt++] = bs*A->j[j] + l;
1500   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1501   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1502   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1503 
1504   /* store nonzero values */
1505   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1506   for (cnt=0, i=0; i<A->mbs; i++)
1507     for (k=0; k<bs; k++)
1508       for (j=A->i[i]; j<A->i[i+1]; j++)
1509         for (l=0; l<bs; l++)
1510           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1511   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1512   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1513   ierr = PetscFree(matvals);CHKERRQ(ierr);
1514 
1515   /* write block size option to the viewer's .info file */
1516   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1521 {
1522   PetscErrorCode ierr;
1523   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1524   PetscInt       i,bs = A->rmap->bs,k;
1525 
1526   PetscFunctionBegin;
1527   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1528   for (i=0; i<a->mbs; i++) {
1529     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1530     for (k=a->i[i]; k<a->i[i+1]; k++) {
1531       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1532     }
1533     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1534   }
1535   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1540 {
1541   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1542   PetscErrorCode    ierr;
1543   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1544   PetscViewerFormat format;
1545 
1546   PetscFunctionBegin;
1547   if (A->structure_only) {
1548     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1549     PetscFunctionReturn(0);
1550   }
1551 
1552   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1553   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1554     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1555   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1556     const char *matname;
1557     Mat        aij;
1558     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1559     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1560     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1561     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1562     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1563   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1564       PetscFunctionReturn(0);
1565   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1566     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1567     for (i=0; i<a->mbs; i++) {
1568       for (j=0; j<bs; j++) {
1569         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1570         for (k=a->i[i]; k<a->i[i+1]; k++) {
1571           for (l=0; l<bs; l++) {
1572 #if defined(PETSC_USE_COMPLEX)
1573             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1574               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1575                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1576             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1578                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1579             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1580               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1581             }
1582 #else
1583             if (a->a[bs2*k + l*bs + j] != 0.0) {
1584               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1585             }
1586 #endif
1587           }
1588         }
1589         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1590       }
1591     }
1592     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1593   } else {
1594     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1595     for (i=0; i<a->mbs; i++) {
1596       for (j=0; j<bs; j++) {
1597         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1598         for (k=a->i[i]; k<a->i[i+1]; k++) {
1599           for (l=0; l<bs; l++) {
1600 #if defined(PETSC_USE_COMPLEX)
1601             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1602               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1603                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1604             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1605               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1606                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1607             } else {
1608               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1609             }
1610 #else
1611             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1612 #endif
1613           }
1614         }
1615         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1616       }
1617     }
1618     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1619   }
1620   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #include <petscdraw.h>
1625 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1626 {
1627   Mat               A = (Mat) Aa;
1628   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1629   PetscErrorCode    ierr;
1630   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1631   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1632   MatScalar         *aa;
1633   PetscViewer       viewer;
1634   PetscViewerFormat format;
1635 
1636   PetscFunctionBegin;
1637   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1638   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1639   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1640 
1641   /* loop over matrix elements drawing boxes */
1642 
1643   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1644     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1645     /* Blue for negative, Cyan for zero and  Red for positive */
1646     color = PETSC_DRAW_BLUE;
1647     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651         aa  = a->a + j*bs2;
1652         for (k=0; k<bs; k++) {
1653           for (l=0; l<bs; l++) {
1654             if (PetscRealPart(*aa++) >=  0.) continue;
1655             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1656           }
1657         }
1658       }
1659     }
1660     color = PETSC_DRAW_CYAN;
1661     for (i=0,row=0; i<mbs; i++,row+=bs) {
1662       for (j=a->i[i]; j<a->i[i+1]; j++) {
1663         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1664         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1665         aa  = a->a + j*bs2;
1666         for (k=0; k<bs; k++) {
1667           for (l=0; l<bs; l++) {
1668             if (PetscRealPart(*aa++) != 0.) continue;
1669             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1670           }
1671         }
1672       }
1673     }
1674     color = PETSC_DRAW_RED;
1675     for (i=0,row=0; i<mbs; i++,row+=bs) {
1676       for (j=a->i[i]; j<a->i[i+1]; j++) {
1677         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1678         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1679         aa  = a->a + j*bs2;
1680         for (k=0; k<bs; k++) {
1681           for (l=0; l<bs; l++) {
1682             if (PetscRealPart(*aa++) <= 0.) continue;
1683             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1684           }
1685         }
1686       }
1687     }
1688     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1689   } else {
1690     /* use contour shading to indicate magnitude of values */
1691     /* first determine max of all nonzero values */
1692     PetscReal minv = 0.0, maxv = 0.0;
1693     PetscDraw popup;
1694 
1695     for (i=0; i<a->nz*a->bs2; i++) {
1696       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1697     }
1698     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1699     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1700     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1701 
1702     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1703     for (i=0,row=0; i<mbs; i++,row+=bs) {
1704       for (j=a->i[i]; j<a->i[i+1]; j++) {
1705         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1706         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1707         aa  = a->a + j*bs2;
1708         for (k=0; k<bs; k++) {
1709           for (l=0; l<bs; l++) {
1710             MatScalar v = *aa++;
1711             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1712             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1713           }
1714         }
1715       }
1716     }
1717     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1723 {
1724   PetscErrorCode ierr;
1725   PetscReal      xl,yl,xr,yr,w,h;
1726   PetscDraw      draw;
1727   PetscBool      isnull;
1728 
1729   PetscFunctionBegin;
1730   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1731   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1732   if (isnull) PetscFunctionReturn(0);
1733 
1734   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1735   xr  += w;          yr += h;        xl = -w;     yl = -h;
1736   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1737   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1738   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1739   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1740   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1745 {
1746   PetscErrorCode ierr;
1747   PetscBool      iascii,isbinary,isdraw;
1748 
1749   PetscFunctionBegin;
1750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1753   if (iascii) {
1754     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1755   } else if (isbinary) {
1756     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1757   } else if (isdraw) {
1758     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1759   } else {
1760     Mat B;
1761     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1762     ierr = MatView(B,viewer);CHKERRQ(ierr);
1763     ierr = MatDestroy(&B);CHKERRQ(ierr);
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1769 {
1770   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1771   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1772   PetscInt    *ai = a->i,*ailen = a->ilen;
1773   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1774   MatScalar   *ap,*aa = a->a;
1775 
1776   PetscFunctionBegin;
1777   for (k=0; k<m; k++) { /* loop over rows */
1778     row = im[k]; brow = row/bs;
1779     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1780     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1781     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1782     nrow = ailen[brow];
1783     for (l=0; l<n; l++) { /* loop over columns */
1784       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1785       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1786       col  = in[l];
1787       bcol = col/bs;
1788       cidx = col%bs;
1789       ridx = row%bs;
1790       high = nrow;
1791       low  = 0; /* assume unsorted */
1792       while (high-low > 5) {
1793         t = (low+high)/2;
1794         if (rp[t] > bcol) high = t;
1795         else             low  = t;
1796       }
1797       for (i=low; i<high; i++) {
1798         if (rp[i] > bcol) break;
1799         if (rp[i] == bcol) {
1800           *v++ = ap[bs2*i+bs*cidx+ridx];
1801           goto finished;
1802         }
1803       }
1804       *v++ = 0.0;
1805 finished:;
1806     }
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1812 {
1813   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1814   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1815   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1816   PetscErrorCode    ierr;
1817   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1818   PetscBool         roworiented=a->roworiented;
1819   const PetscScalar *value     = v;
1820   MatScalar         *ap=NULL,*aa = a->a,*bap;
1821 
1822   PetscFunctionBegin;
1823   if (roworiented) {
1824     stepval = (n-1)*bs;
1825   } else {
1826     stepval = (m-1)*bs;
1827   }
1828   for (k=0; k<m; k++) { /* loop over added rows */
1829     row = im[k];
1830     if (row < 0) continue;
1831     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1832     rp   = aj + ai[row];
1833     if (!A->structure_only) ap = aa + bs2*ai[row];
1834     rmax = imax[row];
1835     nrow = ailen[row];
1836     low  = 0;
1837     high = nrow;
1838     for (l=0; l<n; l++) { /* loop over added columns */
1839       if (in[l] < 0) continue;
1840       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1841       col = in[l];
1842       if (!A->structure_only) {
1843         if (roworiented) {
1844           value = v + (k*(stepval+bs) + l)*bs;
1845         } else {
1846           value = v + (l*(stepval+bs) + k)*bs;
1847         }
1848       }
1849       if (col <= lastcol) low = 0;
1850       else high = nrow;
1851       lastcol = col;
1852       while (high-low > 7) {
1853         t = (low+high)/2;
1854         if (rp[t] > col) high = t;
1855         else             low  = t;
1856       }
1857       for (i=low; i<high; i++) {
1858         if (rp[i] > col) break;
1859         if (rp[i] == col) {
1860           if (A->structure_only) goto noinsert2;
1861           bap = ap +  bs2*i;
1862           if (roworiented) {
1863             if (is == ADD_VALUES) {
1864               for (ii=0; ii<bs; ii++,value+=stepval) {
1865                 for (jj=ii; jj<bs2; jj+=bs) {
1866                   bap[jj] += *value++;
1867                 }
1868               }
1869             } else {
1870               for (ii=0; ii<bs; ii++,value+=stepval) {
1871                 for (jj=ii; jj<bs2; jj+=bs) {
1872                   bap[jj] = *value++;
1873                 }
1874               }
1875             }
1876           } else {
1877             if (is == ADD_VALUES) {
1878               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1879                 for (jj=0; jj<bs; jj++) {
1880                   bap[jj] += value[jj];
1881                 }
1882                 bap += bs;
1883               }
1884             } else {
1885               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1886                 for (jj=0; jj<bs; jj++) {
1887                   bap[jj]  = value[jj];
1888                 }
1889                 bap += bs;
1890               }
1891             }
1892           }
1893           goto noinsert2;
1894         }
1895       }
1896       if (nonew == 1) goto noinsert2;
1897       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1898       if (A->structure_only) {
1899         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1900       } else {
1901         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1902       }
1903       N = nrow++ - 1; high++;
1904       /* shift up all the later entries in this row */
1905       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1906       rp[i] = col;
1907       if (!A->structure_only) {
1908         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1909         bap   = ap +  bs2*i;
1910         if (roworiented) {
1911           for (ii=0; ii<bs; ii++,value+=stepval) {
1912             for (jj=ii; jj<bs2; jj+=bs) {
1913               bap[jj] = *value++;
1914             }
1915           }
1916         } else {
1917           for (ii=0; ii<bs; ii++,value+=stepval) {
1918             for (jj=0; jj<bs; jj++) {
1919               *bap++ = *value++;
1920             }
1921           }
1922         }
1923       }
1924 noinsert2:;
1925       low = i;
1926     }
1927     ailen[row] = nrow;
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1933 {
1934   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1935   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1936   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1937   PetscErrorCode ierr;
1938   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1939   MatScalar      *aa  = a->a,*ap;
1940   PetscReal      ratio=0.6;
1941 
1942   PetscFunctionBegin;
1943   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1944 
1945   if (m) rmax = ailen[0];
1946   for (i=1; i<mbs; i++) {
1947     /* move each row back by the amount of empty slots (fshift) before it*/
1948     fshift += imax[i-1] - ailen[i-1];
1949     rmax    = PetscMax(rmax,ailen[i]);
1950     if (fshift) {
1951       ip = aj + ai[i];
1952       ap = aa + bs2*ai[i];
1953       N  = ailen[i];
1954       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1955       if (!A->structure_only) {
1956         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1957       }
1958     }
1959     ai[i] = ai[i-1] + ailen[i-1];
1960   }
1961   if (mbs) {
1962     fshift += imax[mbs-1] - ailen[mbs-1];
1963     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1964   }
1965 
1966   /* reset ilen and imax for each row */
1967   a->nonzerorowcnt = 0;
1968   if (A->structure_only) {
1969     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1970   } else { /* !A->structure_only */
1971     for (i=0; i<mbs; i++) {
1972       ailen[i] = imax[i] = ai[i+1] - ai[i];
1973       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1974     }
1975   }
1976   a->nz = ai[mbs];
1977 
1978   /* diagonals may have moved, so kill the diagonal pointers */
1979   a->idiagvalid = PETSC_FALSE;
1980   if (fshift && a->diag) {
1981     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1982     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1983     a->diag = NULL;
1984   }
1985   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1986   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1987   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1988   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1989 
1990   A->info.mallocs    += a->reallocs;
1991   a->reallocs         = 0;
1992   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1993   a->rmax             = rmax;
1994 
1995   if (!A->structure_only) {
1996     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1997   }
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 /*
2002    This function returns an array of flags which indicate the locations of contiguous
2003    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2004    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2005    Assume: sizes should be long enough to hold all the values.
2006 */
2007 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2008 {
2009   PetscInt  i,j,k,row;
2010   PetscBool flg;
2011 
2012   PetscFunctionBegin;
2013   for (i=0,j=0; i<n; j++) {
2014     row = idx[i];
2015     if (row%bs!=0) { /* Not the begining of a block */
2016       sizes[j] = 1;
2017       i++;
2018     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2019       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2020       i++;
2021     } else { /* Begining of the block, so check if the complete block exists */
2022       flg = PETSC_TRUE;
2023       for (k=1; k<bs; k++) {
2024         if (row+k != idx[i+k]) { /* break in the block */
2025           flg = PETSC_FALSE;
2026           break;
2027         }
2028       }
2029       if (flg) { /* No break in the bs */
2030         sizes[j] = bs;
2031         i       += bs;
2032       } else {
2033         sizes[j] = 1;
2034         i++;
2035       }
2036     }
2037   }
2038   *bs_max = j;
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2043 {
2044   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2045   PetscErrorCode    ierr;
2046   PetscInt          i,j,k,count,*rows;
2047   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2048   PetscScalar       zero = 0.0;
2049   MatScalar         *aa;
2050   const PetscScalar *xx;
2051   PetscScalar       *bb;
2052 
2053   PetscFunctionBegin;
2054   /* fix right hand side if needed */
2055   if (x && b) {
2056     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2057     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2058     for (i=0; i<is_n; i++) {
2059       bb[is_idx[i]] = diag*xx[is_idx[i]];
2060     }
2061     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2062     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2063   }
2064 
2065   /* Make a copy of the IS and  sort it */
2066   /* allocate memory for rows,sizes */
2067   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2068 
2069   /* copy IS values to rows, and sort them */
2070   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2071   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2072 
2073   if (baij->keepnonzeropattern) {
2074     for (i=0; i<is_n; i++) sizes[i] = 1;
2075     bs_max          = is_n;
2076   } else {
2077     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2078     A->nonzerostate++;
2079   }
2080 
2081   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2082     row = rows[j];
2083     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2084     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2085     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2086     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2087       if (diag != (PetscScalar)0.0) {
2088         if (baij->ilen[row/bs] > 0) {
2089           baij->ilen[row/bs]       = 1;
2090           baij->j[baij->i[row/bs]] = row/bs;
2091 
2092           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2093         }
2094         /* Now insert all the diagonal values for this bs */
2095         for (k=0; k<bs; k++) {
2096           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2097         }
2098       } else { /* (diag == 0.0) */
2099         baij->ilen[row/bs] = 0;
2100       } /* end (diag == 0.0) */
2101     } else { /* (sizes[i] != bs) */
2102       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2103       for (k=0; k<count; k++) {
2104         aa[0] =  zero;
2105         aa   += bs;
2106       }
2107       if (diag != (PetscScalar)0.0) {
2108         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2109       }
2110     }
2111   }
2112 
2113   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2114   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2119 {
2120   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2121   PetscErrorCode    ierr;
2122   PetscInt          i,j,k,count;
2123   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2124   PetscScalar       zero = 0.0;
2125   MatScalar         *aa;
2126   const PetscScalar *xx;
2127   PetscScalar       *bb;
2128   PetscBool         *zeroed,vecs = PETSC_FALSE;
2129 
2130   PetscFunctionBegin;
2131   /* fix right hand side if needed */
2132   if (x && b) {
2133     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2134     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2135     vecs = PETSC_TRUE;
2136   }
2137 
2138   /* zero the columns */
2139   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2140   for (i=0; i<is_n; i++) {
2141     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2142     zeroed[is_idx[i]] = PETSC_TRUE;
2143   }
2144   for (i=0; i<A->rmap->N; i++) {
2145     if (!zeroed[i]) {
2146       row = i/bs;
2147       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2148         for (k=0; k<bs; k++) {
2149           col = bs*baij->j[j] + k;
2150           if (zeroed[col]) {
2151             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2152             if (vecs) bb[i] -= aa[0]*xx[col];
2153             aa[0] = 0.0;
2154           }
2155         }
2156       }
2157     } else if (vecs) bb[i] = diag*xx[i];
2158   }
2159   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2160   if (vecs) {
2161     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2162     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2163   }
2164 
2165   /* zero the rows */
2166   for (i=0; i<is_n; i++) {
2167     row   = is_idx[i];
2168     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2169     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2170     for (k=0; k<count; k++) {
2171       aa[0] =  zero;
2172       aa   += bs;
2173     }
2174     if (diag != (PetscScalar)0.0) {
2175       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2176     }
2177   }
2178   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2183 {
2184   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2185   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2186   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2187   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2188   PetscErrorCode ierr;
2189   PetscInt       ridx,cidx,bs2=a->bs2;
2190   PetscBool      roworiented=a->roworiented;
2191   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2192 
2193   PetscFunctionBegin;
2194   for (k=0; k<m; k++) { /* loop over added rows */
2195     row  = im[k];
2196     brow = row/bs;
2197     if (row < 0) continue;
2198     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2199     rp   = aj + ai[brow];
2200     if (!A->structure_only) ap = aa + bs2*ai[brow];
2201     rmax = imax[brow];
2202     nrow = ailen[brow];
2203     low  = 0;
2204     high = nrow;
2205     for (l=0; l<n; l++) { /* loop over added columns */
2206       if (in[l] < 0) continue;
2207       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2208       col  = in[l]; bcol = col/bs;
2209       ridx = row % bs; cidx = col % bs;
2210       if (!A->structure_only) {
2211         if (roworiented) {
2212           value = v[l + k*n];
2213         } else {
2214           value = v[k + l*m];
2215         }
2216       }
2217       if (col <= lastcol) low = 0; else high = nrow;
2218       lastcol = col;
2219       while (high-low > 7) {
2220         t = (low+high)/2;
2221         if (rp[t] > bcol) high = t;
2222         else              low  = t;
2223       }
2224       for (i=low; i<high; i++) {
2225         if (rp[i] > bcol) break;
2226         if (rp[i] == bcol) {
2227           bap = ap +  bs2*i + bs*cidx + ridx;
2228           if (!A->structure_only) {
2229             if (is == ADD_VALUES) *bap += value;
2230             else                  *bap  = value;
2231           }
2232           goto noinsert1;
2233         }
2234       }
2235       if (nonew == 1) goto noinsert1;
2236       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2237       if (A->structure_only) {
2238         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2239       } else {
2240         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2241       }
2242       N = nrow++ - 1; high++;
2243       /* shift up all the later entries in this row */
2244       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2245       rp[i] = bcol;
2246       if (!A->structure_only) {
2247         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2248         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2249         ap[bs2*i + bs*cidx + ridx] = value;
2250       }
2251       a->nz++;
2252       A->nonzerostate++;
2253 noinsert1:;
2254       low = i;
2255     }
2256     ailen[brow] = nrow;
2257   }
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2262 {
2263   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2264   Mat            outA;
2265   PetscErrorCode ierr;
2266   PetscBool      row_identity,col_identity;
2267 
2268   PetscFunctionBegin;
2269   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2270   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2271   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2272   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2273 
2274   outA            = inA;
2275   inA->factortype = MAT_FACTOR_LU;
2276   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2277   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2278 
2279   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2280 
2281   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2282   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2283   a->row = row;
2284   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2285   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2286   a->col = col;
2287 
2288   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2289   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2290   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2291   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2292 
2293   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2294   if (!a->solve_work) {
2295     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2296     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2297   }
2298   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2303 {
2304   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2305   PetscInt    i,nz,mbs;
2306 
2307   PetscFunctionBegin;
2308   nz  = baij->maxnz;
2309   mbs = baij->mbs;
2310   for (i=0; i<nz; i++) {
2311     baij->j[i] = indices[i];
2312   }
2313   baij->nz = nz;
2314   for (i=0; i<mbs; i++) {
2315     baij->ilen[i] = baij->imax[i];
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 /*@
2321     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2322        in the matrix.
2323 
2324   Input Parameters:
2325 +  mat - the SeqBAIJ matrix
2326 -  indices - the column indices
2327 
2328   Level: advanced
2329 
2330   Notes:
2331     This can be called if you have precomputed the nonzero structure of the
2332   matrix and want to provide it to the matrix object to improve the performance
2333   of the MatSetValues() operation.
2334 
2335     You MUST have set the correct numbers of nonzeros per row in the call to
2336   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2337 
2338     MUST be called before any calls to MatSetValues();
2339 
2340 @*/
2341 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2342 {
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2347   PetscValidPointer(indices,2);
2348   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2353 {
2354   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2355   PetscErrorCode ierr;
2356   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2357   PetscReal      atmp;
2358   PetscScalar    *x,zero = 0.0;
2359   MatScalar      *aa;
2360   PetscInt       ncols,brow,krow,kcol;
2361 
2362   PetscFunctionBegin;
2363   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2364   bs  = A->rmap->bs;
2365   aa  = a->a;
2366   ai  = a->i;
2367   aj  = a->j;
2368   mbs = a->mbs;
2369 
2370   ierr = VecSet(v,zero);CHKERRQ(ierr);
2371   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2372   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2373   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2374   for (i=0; i<mbs; i++) {
2375     ncols = ai[1] - ai[0]; ai++;
2376     brow  = bs*i;
2377     for (j=0; j<ncols; j++) {
2378       for (kcol=0; kcol<bs; kcol++) {
2379         for (krow=0; krow<bs; krow++) {
2380           atmp = PetscAbsScalar(*aa);aa++;
2381           row  = brow + krow;   /* row index */
2382           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2383         }
2384       }
2385       aj++;
2386     }
2387   }
2388   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2393 {
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   /* If the two matrices have the same copy implementation, use fast copy. */
2398   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2399     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2400     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2401     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2402 
2403     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2404     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2405     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2406     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2407   } else {
2408     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2409   }
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2414 {
2415   PetscErrorCode ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2423 {
2424   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2425 
2426   PetscFunctionBegin;
2427   *array = a->a;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2432 {
2433   PetscFunctionBegin;
2434   *array = NULL;
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2439 {
2440   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2441   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2442   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2443   PetscErrorCode ierr;
2444 
2445   PetscFunctionBegin;
2446   /* Set the number of nonzeros in the new matrix */
2447   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2452 {
2453   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2454   PetscErrorCode ierr;
2455   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2456   PetscBLASInt   one=1;
2457 
2458   PetscFunctionBegin;
2459   if (str == SAME_NONZERO_PATTERN) {
2460     PetscScalar  alpha = a;
2461     PetscBLASInt bnz;
2462     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2463     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2464     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2465   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2466     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2467   } else {
2468     Mat      B;
2469     PetscInt *nnz;
2470     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2471     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2472     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2473     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2474     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2475     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2476     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2477     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2478     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2479     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2480     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2481     ierr = PetscFree(nnz);CHKERRQ(ierr);
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2487 {
2488 #if defined(PETSC_USE_COMPLEX)
2489   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2490   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2491   MatScalar   *aa = a->a;
2492 
2493   PetscFunctionBegin;
2494   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2495 #else
2496   PetscFunctionBegin;
2497 #endif
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2502 {
2503   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2504   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2505   MatScalar   *aa = a->a;
2506 
2507   PetscFunctionBegin;
2508   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2513 {
2514   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2515   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2516   MatScalar   *aa = a->a;
2517 
2518   PetscFunctionBegin;
2519   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /*
2524     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2525 */
2526 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2527 {
2528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2529   PetscErrorCode ierr;
2530   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2531   PetscInt       nz = a->i[m],row,*jj,mr,col;
2532 
2533   PetscFunctionBegin;
2534   *nn = n;
2535   if (!ia) PetscFunctionReturn(0);
2536   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2537   else {
2538     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2539     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2540     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2541     jj   = a->j;
2542     for (i=0; i<nz; i++) {
2543       collengths[jj[i]]++;
2544     }
2545     cia[0] = oshift;
2546     for (i=0; i<n; i++) {
2547       cia[i+1] = cia[i] + collengths[i];
2548     }
2549     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2550     jj   = a->j;
2551     for (row=0; row<m; row++) {
2552       mr = a->i[row+1] - a->i[row];
2553       for (i=0; i<mr; i++) {
2554         col = *jj++;
2555 
2556         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2557       }
2558     }
2559     ierr = PetscFree(collengths);CHKERRQ(ierr);
2560     *ia  = cia; *ja = cja;
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2566 {
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   if (!ia) PetscFunctionReturn(0);
2571   ierr = PetscFree(*ia);CHKERRQ(ierr);
2572   ierr = PetscFree(*ja);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 /*
2577  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2578  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2579  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2580  */
2581 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2582 {
2583   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2584   PetscErrorCode ierr;
2585   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2586   PetscInt       nz = a->i[m],row,*jj,mr,col;
2587   PetscInt       *cspidx;
2588 
2589   PetscFunctionBegin;
2590   *nn = n;
2591   if (!ia) PetscFunctionReturn(0);
2592 
2593   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2594   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2595   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2596   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2597   jj   = a->j;
2598   for (i=0; i<nz; i++) {
2599     collengths[jj[i]]++;
2600   }
2601   cia[0] = oshift;
2602   for (i=0; i<n; i++) {
2603     cia[i+1] = cia[i] + collengths[i];
2604   }
2605   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2606   jj   = a->j;
2607   for (row=0; row<m; row++) {
2608     mr = a->i[row+1] - a->i[row];
2609     for (i=0; i<mr; i++) {
2610       col = *jj++;
2611       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2612       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2613     }
2614   }
2615   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2616   *ia    = cia;
2617   *ja    = cja;
2618   *spidx = cspidx;
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2623 {
2624   PetscErrorCode ierr;
2625 
2626   PetscFunctionBegin;
2627   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2628   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2633 {
2634   PetscErrorCode ierr;
2635   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2636 
2637   PetscFunctionBegin;
2638   if (!Y->preallocated || !aij->nz) {
2639     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2640   }
2641   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /* -------------------------------------------------------------------*/
2646 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2647                                        MatGetRow_SeqBAIJ,
2648                                        MatRestoreRow_SeqBAIJ,
2649                                        MatMult_SeqBAIJ_N,
2650                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2651                                        MatMultTranspose_SeqBAIJ,
2652                                        MatMultTransposeAdd_SeqBAIJ,
2653                                        NULL,
2654                                        NULL,
2655                                        NULL,
2656                                /* 10*/ NULL,
2657                                        MatLUFactor_SeqBAIJ,
2658                                        NULL,
2659                                        NULL,
2660                                        MatTranspose_SeqBAIJ,
2661                                /* 15*/ MatGetInfo_SeqBAIJ,
2662                                        MatEqual_SeqBAIJ,
2663                                        MatGetDiagonal_SeqBAIJ,
2664                                        MatDiagonalScale_SeqBAIJ,
2665                                        MatNorm_SeqBAIJ,
2666                                /* 20*/ NULL,
2667                                        MatAssemblyEnd_SeqBAIJ,
2668                                        MatSetOption_SeqBAIJ,
2669                                        MatZeroEntries_SeqBAIJ,
2670                                /* 24*/ MatZeroRows_SeqBAIJ,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                /* 29*/ MatSetUp_SeqBAIJ,
2676                                        NULL,
2677                                        NULL,
2678                                        NULL,
2679                                        NULL,
2680                                /* 34*/ MatDuplicate_SeqBAIJ,
2681                                        NULL,
2682                                        NULL,
2683                                        MatILUFactor_SeqBAIJ,
2684                                        NULL,
2685                                /* 39*/ MatAXPY_SeqBAIJ,
2686                                        MatCreateSubMatrices_SeqBAIJ,
2687                                        MatIncreaseOverlap_SeqBAIJ,
2688                                        MatGetValues_SeqBAIJ,
2689                                        MatCopy_SeqBAIJ,
2690                                /* 44*/ NULL,
2691                                        MatScale_SeqBAIJ,
2692                                        MatShift_SeqBAIJ,
2693                                        NULL,
2694                                        MatZeroRowsColumns_SeqBAIJ,
2695                                /* 49*/ NULL,
2696                                        MatGetRowIJ_SeqBAIJ,
2697                                        MatRestoreRowIJ_SeqBAIJ,
2698                                        MatGetColumnIJ_SeqBAIJ,
2699                                        MatRestoreColumnIJ_SeqBAIJ,
2700                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        MatSetValuesBlocked_SeqBAIJ,
2705                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2706                                        MatDestroy_SeqBAIJ,
2707                                        MatView_SeqBAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                /* 64*/ NULL,
2711                                        NULL,
2712                                        NULL,
2713                                        NULL,
2714                                        NULL,
2715                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2716                                        NULL,
2717                                        MatConvert_Basic,
2718                                        NULL,
2719                                        NULL,
2720                                /* 74*/ NULL,
2721                                        MatFDColoringApply_BAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                /* 79*/ NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        NULL,
2729                                        MatLoad_SeqBAIJ,
2730                                /* 84*/ NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                /* 89*/ NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                /* 94*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                /* 99*/ NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        MatConjugate_SeqBAIJ,
2749                                        NULL,
2750                                /*104*/ NULL,
2751                                        MatRealPart_SeqBAIJ,
2752                                        MatImaginaryPart_SeqBAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                /*109*/ NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        MatMissingDiagonal_SeqBAIJ,
2760                                /*114*/ NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                /*119*/ NULL,
2766                                        NULL,
2767                                        MatMultHermitianTranspose_SeqBAIJ,
2768                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2769                                        NULL,
2770                                /*124*/ NULL,
2771                                        MatGetColumnNorms_SeqBAIJ,
2772                                        MatInvertBlockDiagonal_SeqBAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                /*129*/ NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                /*134*/ NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                /*139*/ MatSetBlockSizes_Default,
2786                                        NULL,
2787                                        NULL,
2788                                        MatFDColoringSetUp_SeqXAIJ,
2789                                        NULL,
2790                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2791                                        MatDestroySubMatrices_SeqBAIJ
2792 };
2793 
2794 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2795 {
2796   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2797   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2802 
2803   /* allocate space for values if not already there */
2804   if (!aij->saved_values) {
2805     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2806     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2807   }
2808 
2809   /* copy values over */
2810   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2815 {
2816   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2817   PetscErrorCode ierr;
2818   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2819 
2820   PetscFunctionBegin;
2821   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2822   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2823 
2824   /* copy values over */
2825   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2830 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2831 
2832 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2833 {
2834   Mat_SeqBAIJ    *b;
2835   PetscErrorCode ierr;
2836   PetscInt       i,mbs,nbs,bs2;
2837   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2838 
2839   PetscFunctionBegin;
2840   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2841   if (nz == MAT_SKIP_ALLOCATION) {
2842     skipallocation = PETSC_TRUE;
2843     nz             = 0;
2844   }
2845 
2846   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2847   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2850 
2851   B->preallocated = PETSC_TRUE;
2852 
2853   mbs = B->rmap->n/bs;
2854   nbs = B->cmap->n/bs;
2855   bs2 = bs*bs;
2856 
2857   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2858 
2859   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2860   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2861   if (nnz) {
2862     for (i=0; i<mbs; i++) {
2863       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2864       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2865     }
2866   }
2867 
2868   b    = (Mat_SeqBAIJ*)B->data;
2869   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2870   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2871   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2872 
2873   if (!flg) {
2874     switch (bs) {
2875     case 1:
2876       B->ops->mult    = MatMult_SeqBAIJ_1;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2878       break;
2879     case 2:
2880       B->ops->mult    = MatMult_SeqBAIJ_2;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2882       break;
2883     case 3:
2884       B->ops->mult    = MatMult_SeqBAIJ_3;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2886       break;
2887     case 4:
2888       B->ops->mult    = MatMult_SeqBAIJ_4;
2889       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2890       break;
2891     case 5:
2892       B->ops->mult    = MatMult_SeqBAIJ_5;
2893       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2894       break;
2895     case 6:
2896       B->ops->mult    = MatMult_SeqBAIJ_6;
2897       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2898       break;
2899     case 7:
2900       B->ops->mult    = MatMult_SeqBAIJ_7;
2901       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2902       break;
2903     case 9:
2904     {
2905       PetscInt version = 1;
2906       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2907       switch (version) {
2908 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2909       case 1:
2910         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2911         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2912         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2913         break;
2914 #endif
2915       default:
2916         B->ops->mult    = MatMult_SeqBAIJ_N;
2917         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2918         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2919         break;
2920       }
2921       break;
2922     }
2923     case 11:
2924       B->ops->mult    = MatMult_SeqBAIJ_11;
2925       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2926       break;
2927     case 12:
2928     {
2929       PetscInt version = 1;
2930       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2931       switch (version) {
2932       case 1:
2933         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2934         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2935         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2936         break;
2937       case 2:
2938         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2939         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2940         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2941         break;
2942 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2943       case 3:
2944         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2945         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2946         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2947         break;
2948 #endif
2949       default:
2950         B->ops->mult    = MatMult_SeqBAIJ_N;
2951         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2952         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2953         break;
2954       }
2955       break;
2956     }
2957     case 15:
2958     {
2959       PetscInt version = 1;
2960       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2961       switch (version) {
2962       case 1:
2963         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2964         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2965         break;
2966       case 2:
2967         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2968         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2969         break;
2970       case 3:
2971         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2972         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2973         break;
2974       case 4:
2975         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2976         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2977         break;
2978       default:
2979         B->ops->mult    = MatMult_SeqBAIJ_N;
2980         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2981         break;
2982       }
2983       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2984       break;
2985     }
2986     default:
2987       B->ops->mult    = MatMult_SeqBAIJ_N;
2988       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2989       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2990       break;
2991     }
2992   }
2993   B->ops->sor = MatSOR_SeqBAIJ;
2994   b->mbs = mbs;
2995   b->nbs = nbs;
2996   if (!skipallocation) {
2997     if (!b->imax) {
2998       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2999       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3000 
3001       b->free_imax_ilen = PETSC_TRUE;
3002     }
3003     /* b->ilen will count nonzeros in each block row so far. */
3004     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3005     if (!nnz) {
3006       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3007       else if (nz < 0) nz = 1;
3008       nz = PetscMin(nz,nbs);
3009       for (i=0; i<mbs; i++) b->imax[i] = nz;
3010       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3011     } else {
3012       PetscInt64 nz64 = 0;
3013       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3014       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3015     }
3016 
3017     /* allocate the matrix space */
3018     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3019     if (B->structure_only) {
3020       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3021       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3022       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3023     } else {
3024       PetscInt nzbs2 = 0;
3025       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
3026       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
3027       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3028       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3029     }
3030     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
3031 
3032     if (B->structure_only) {
3033       b->singlemalloc = PETSC_FALSE;
3034       b->free_a       = PETSC_FALSE;
3035     } else {
3036       b->singlemalloc = PETSC_TRUE;
3037       b->free_a       = PETSC_TRUE;
3038     }
3039     b->free_ij = PETSC_TRUE;
3040 
3041     b->i[0] = 0;
3042     for (i=1; i<mbs+1; i++) {
3043       b->i[i] = b->i[i-1] + b->imax[i-1];
3044     }
3045 
3046   } else {
3047     b->free_a  = PETSC_FALSE;
3048     b->free_ij = PETSC_FALSE;
3049   }
3050 
3051   b->bs2              = bs2;
3052   b->mbs              = mbs;
3053   b->nz               = 0;
3054   b->maxnz            = nz;
3055   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3056   B->was_assembled    = PETSC_FALSE;
3057   B->assembled        = PETSC_FALSE;
3058   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3063 {
3064   PetscInt       i,m,nz,nz_max=0,*nnz;
3065   PetscScalar    *values=NULL;
3066   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3067   PetscErrorCode ierr;
3068 
3069   PetscFunctionBegin;
3070   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3071   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3072   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3073   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3074   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3075   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3076   m    = B->rmap->n/bs;
3077 
3078   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3079   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3080   for (i=0; i<m; i++) {
3081     nz = ii[i+1]- ii[i];
3082     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3083     nz_max = PetscMax(nz_max, nz);
3084     nnz[i] = nz;
3085   }
3086   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3087   ierr = PetscFree(nnz);CHKERRQ(ierr);
3088 
3089   values = (PetscScalar*)V;
3090   if (!values) {
3091     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3092   }
3093   for (i=0; i<m; i++) {
3094     PetscInt          ncols  = ii[i+1] - ii[i];
3095     const PetscInt    *icols = jj + ii[i];
3096     if (bs == 1 || !roworiented) {
3097       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3098       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3099     } else {
3100       PetscInt j;
3101       for (j=0; j<ncols; j++) {
3102         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3103         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3104       }
3105     }
3106   }
3107   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3108   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 /*@C
3115    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3116 
3117    Not Collective
3118 
3119    Input Parameter:
3120 .  mat - a MATSEQBAIJ matrix
3121 
3122    Output Parameter:
3123 .   array - pointer to the data
3124 
3125    Level: intermediate
3126 
3127 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3128 @*/
3129 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3130 {
3131   PetscErrorCode ierr;
3132 
3133   PetscFunctionBegin;
3134   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 /*@C
3139    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3140 
3141    Not Collective
3142 
3143    Input Parameters:
3144 +  mat - a MATSEQBAIJ matrix
3145 -  array - pointer to the data
3146 
3147    Level: intermediate
3148 
3149 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3150 @*/
3151 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3152 {
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 /*MC
3161    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3162    block sparse compressed row format.
3163 
3164    Options Database Keys:
3165 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3166 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3167 
3168    Level: beginner
3169 
3170    Notes:
3171     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3172     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3173 
3174    Run with -info to see what version of the matrix-vector product is being used
3175 
3176 .seealso: MatCreateSeqBAIJ()
3177 M*/
3178 
3179 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3180 
3181 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3182 {
3183   PetscErrorCode ierr;
3184   PetscMPIInt    size;
3185   Mat_SeqBAIJ    *b;
3186 
3187   PetscFunctionBegin;
3188   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3189   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3190 
3191   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3192   B->data = (void*)b;
3193   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3194 
3195   b->row          = NULL;
3196   b->col          = NULL;
3197   b->icol         = NULL;
3198   b->reallocs     = 0;
3199   b->saved_values = NULL;
3200 
3201   b->roworiented        = PETSC_TRUE;
3202   b->nonew              = 0;
3203   b->diag               = NULL;
3204   B->spptr              = NULL;
3205   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3206   b->keepnonzeropattern = PETSC_FALSE;
3207 
3208   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3210   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3211   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3212   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3213   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3214   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3215   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3217   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3218   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3219 #if defined(PETSC_HAVE_HYPRE)
3220   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3221 #endif
3222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3223   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3228 {
3229   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3230   PetscErrorCode ierr;
3231   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3232 
3233   PetscFunctionBegin;
3234   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3235 
3236   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3237     c->imax           = a->imax;
3238     c->ilen           = a->ilen;
3239     c->free_imax_ilen = PETSC_FALSE;
3240   } else {
3241     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3242     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3243     for (i=0; i<mbs; i++) {
3244       c->imax[i] = a->imax[i];
3245       c->ilen[i] = a->ilen[i];
3246     }
3247     c->free_imax_ilen = PETSC_TRUE;
3248   }
3249 
3250   /* allocate the matrix space */
3251   if (mallocmatspace) {
3252     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3253       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3254       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3255 
3256       c->i            = a->i;
3257       c->j            = a->j;
3258       c->singlemalloc = PETSC_FALSE;
3259       c->free_a       = PETSC_TRUE;
3260       c->free_ij      = PETSC_FALSE;
3261       c->parent       = A;
3262       C->preallocated = PETSC_TRUE;
3263       C->assembled    = PETSC_TRUE;
3264 
3265       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3266       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3267       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3268     } else {
3269       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3270       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3271 
3272       c->singlemalloc = PETSC_TRUE;
3273       c->free_a       = PETSC_TRUE;
3274       c->free_ij      = PETSC_TRUE;
3275 
3276       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3277       if (mbs > 0) {
3278         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3279         if (cpvalues == MAT_COPY_VALUES) {
3280           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3281         } else {
3282           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3283         }
3284       }
3285       C->preallocated = PETSC_TRUE;
3286       C->assembled    = PETSC_TRUE;
3287     }
3288   }
3289 
3290   c->roworiented = a->roworiented;
3291   c->nonew       = a->nonew;
3292 
3293   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3294   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3295 
3296   c->bs2         = a->bs2;
3297   c->mbs         = a->mbs;
3298   c->nbs         = a->nbs;
3299 
3300   if (a->diag) {
3301     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3302       c->diag      = a->diag;
3303       c->free_diag = PETSC_FALSE;
3304     } else {
3305       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3306       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3307       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3308       c->free_diag = PETSC_TRUE;
3309     }
3310   } else c->diag = NULL;
3311 
3312   c->nz         = a->nz;
3313   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3314   c->solve_work = NULL;
3315   c->mult_work  = NULL;
3316   c->sor_workt  = NULL;
3317   c->sor_work   = NULL;
3318 
3319   c->compressedrow.use   = a->compressedrow.use;
3320   c->compressedrow.nrows = a->compressedrow.nrows;
3321   if (a->compressedrow.use) {
3322     i    = a->compressedrow.nrows;
3323     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3324     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3325     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3326     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3327   } else {
3328     c->compressedrow.use    = PETSC_FALSE;
3329     c->compressedrow.i      = NULL;
3330     c->compressedrow.rindex = NULL;
3331   }
3332   C->nonzerostate = A->nonzerostate;
3333 
3334   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3339 {
3340   PetscErrorCode ierr;
3341 
3342   PetscFunctionBegin;
3343   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3344   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3345   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3346   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3351 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3352 {
3353   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3354   PetscInt       *rowidxs,*colidxs;
3355   PetscScalar    *matvals;
3356   PetscErrorCode ierr;
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3360 
3361   /* read matrix header */
3362   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3363   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3364   M = header[1]; N = header[2]; nz = header[3];
3365   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3366   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3367   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3368 
3369   /* set block sizes from the viewer's .info file */
3370   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3371   /* set local and global sizes if not set already */
3372   if (mat->rmap->n < 0) mat->rmap->n = M;
3373   if (mat->cmap->n < 0) mat->cmap->n = N;
3374   if (mat->rmap->N < 0) mat->rmap->N = M;
3375   if (mat->cmap->N < 0) mat->cmap->N = N;
3376   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3377   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3378 
3379   /* check if the matrix sizes are correct */
3380   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3381   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3382   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3383   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3384   mbs = m/bs; nbs = n/bs;
3385 
3386   /* read in row lengths, column indices and nonzero values */
3387   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3388   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3389   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3390   sum = rowidxs[m];
3391   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3392 
3393   /* read in column indices and nonzero values */
3394   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3395   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3396   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3397 
3398   { /* preallocate matrix storage */
3399     PetscBT   bt; /* helper bit set to count nonzeros */
3400     PetscInt  *nnz;
3401     PetscBool sbaij;
3402 
3403     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3404     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3405     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3406     for (i=0; i<mbs; i++) {
3407       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3408       for (k=0; k<bs; k++) {
3409         PetscInt row = bs*i + k;
3410         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3411           PetscInt col = colidxs[j];
3412           if (!sbaij || col >= row)
3413             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3414         }
3415       }
3416     }
3417     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3418     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3419     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3420     ierr = PetscFree(nnz);CHKERRQ(ierr);
3421   }
3422 
3423   /* store matrix values */
3424   for (i=0; i<m; i++) {
3425     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3426     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3427   }
3428 
3429   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3430   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3431   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3432   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3437 {
3438   PetscErrorCode ierr;
3439   PetscBool      isbinary;
3440 
3441   PetscFunctionBegin;
3442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3443   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3444   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 /*@C
3449    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3450    compressed row) format.  For good matrix assembly performance the
3451    user should preallocate the matrix storage by setting the parameter nz
3452    (or the array nnz).  By setting these parameters accurately, performance
3453    during matrix assembly can be increased by more than a factor of 50.
3454 
3455    Collective
3456 
3457    Input Parameters:
3458 +  comm - MPI communicator, set to PETSC_COMM_SELF
3459 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3460           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3461 .  m - number of rows
3462 .  n - number of columns
3463 .  nz - number of nonzero blocks  per block row (same for all rows)
3464 -  nnz - array containing the number of nonzero blocks in the various block rows
3465          (possibly different for each block row) or NULL
3466 
3467    Output Parameter:
3468 .  A - the matrix
3469 
3470    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3471    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3472    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3473 
3474    Options Database Keys:
3475 +   -mat_no_unroll - uses code that does not unroll the loops in the
3476                      block calculations (much slower)
3477 -    -mat_block_size - size of the blocks to use
3478 
3479    Level: intermediate
3480 
3481    Notes:
3482    The number of rows and columns must be divisible by blocksize.
3483 
3484    If the nnz parameter is given then the nz parameter is ignored
3485 
3486    A nonzero block is any block that as 1 or more nonzeros in it
3487 
3488    The block AIJ format is fully compatible with standard Fortran 77
3489    storage.  That is, the stored row and column indices can begin at
3490    either one (as in Fortran) or zero.  See the users' manual for details.
3491 
3492    Specify the preallocated storage with either nz or nnz (not both).
3493    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3494    allocation.  See Users-Manual: ch_mat for details.
3495    matrices.
3496 
3497 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3498 @*/
3499 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3500 {
3501   PetscErrorCode ierr;
3502 
3503   PetscFunctionBegin;
3504   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3505   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3506   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3507   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 /*@C
3512    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3513    per row in the matrix. For good matrix assembly performance the
3514    user should preallocate the matrix storage by setting the parameter nz
3515    (or the array nnz).  By setting these parameters accurately, performance
3516    during matrix assembly can be increased by more than a factor of 50.
3517 
3518    Collective
3519 
3520    Input Parameters:
3521 +  B - the matrix
3522 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3523           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3524 .  nz - number of block nonzeros per block row (same for all rows)
3525 -  nnz - array containing the number of block nonzeros in the various block rows
3526          (possibly different for each block row) or NULL
3527 
3528    Options Database Keys:
3529 +   -mat_no_unroll - uses code that does not unroll the loops in the
3530                      block calculations (much slower)
3531 -   -mat_block_size - size of the blocks to use
3532 
3533    Level: intermediate
3534 
3535    Notes:
3536    If the nnz parameter is given then the nz parameter is ignored
3537 
3538    You can call MatGetInfo() to get information on how effective the preallocation was;
3539    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3540    You can also run with the option -info and look for messages with the string
3541    malloc in them to see if additional memory allocation was needed.
3542 
3543    The block AIJ format is fully compatible with standard Fortran 77
3544    storage.  That is, the stored row and column indices can begin at
3545    either one (as in Fortran) or zero.  See the users' manual for details.
3546 
3547    Specify the preallocated storage with either nz or nnz (not both).
3548    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3549    allocation.  See Users-Manual: ch_mat for details.
3550 
3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3552 @*/
3553 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3554 {
3555   PetscErrorCode ierr;
3556 
3557   PetscFunctionBegin;
3558   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3559   PetscValidType(B,1);
3560   PetscValidLogicalCollectiveInt(B,bs,2);
3561   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 /*@C
3566    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3567 
3568    Collective
3569 
3570    Input Parameters:
3571 +  B - the matrix
3572 .  i - the indices into j for the start of each local row (starts with zero)
3573 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3574 -  v - optional values in the matrix
3575 
3576    Level: advanced
3577 
3578    Notes:
3579    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3580    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3581    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3582    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3583    block column and the second index is over columns within a block.
3584 
3585    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3586 
3587 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3588 @*/
3589 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3590 {
3591   PetscErrorCode ierr;
3592 
3593   PetscFunctionBegin;
3594   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3595   PetscValidType(B,1);
3596   PetscValidLogicalCollectiveInt(B,bs,2);
3597   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3598   PetscFunctionReturn(0);
3599 }
3600 
3601 /*@
3602      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3603 
3604      Collective
3605 
3606    Input Parameters:
3607 +  comm - must be an MPI communicator of size 1
3608 .  bs - size of block
3609 .  m - number of rows
3610 .  n - number of columns
3611 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3612 .  j - column indices
3613 -  a - matrix values
3614 
3615    Output Parameter:
3616 .  mat - the matrix
3617 
3618    Level: advanced
3619 
3620    Notes:
3621        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3622     once the matrix is destroyed
3623 
3624        You cannot set new nonzero locations into this matrix, that will generate an error.
3625 
3626        The i and j indices are 0 based
3627 
3628        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3629 
3630       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3631       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3632       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3633       with column-major ordering within blocks.
3634 
3635 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3636 
3637 @*/
3638 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3639 {
3640   PetscErrorCode ierr;
3641   PetscInt       ii;
3642   Mat_SeqBAIJ    *baij;
3643 
3644   PetscFunctionBegin;
3645   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3646   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3647 
3648   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3649   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3650   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3651   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3652   baij = (Mat_SeqBAIJ*)(*mat)->data;
3653   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3654   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3655 
3656   baij->i = i;
3657   baij->j = j;
3658   baij->a = a;
3659 
3660   baij->singlemalloc = PETSC_FALSE;
3661   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3662   baij->free_a       = PETSC_FALSE;
3663   baij->free_ij      = PETSC_FALSE;
3664 
3665   for (ii=0; ii<m; ii++) {
3666     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3667     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3668   }
3669   if (PetscDefined(USE_DEBUG)) {
3670     for (ii=0; ii<baij->i[m]; ii++) {
3671       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3672       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3673     }
3674   }
3675 
3676   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3677   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3682 {
3683   PetscErrorCode ierr;
3684   PetscMPIInt    size;
3685 
3686   PetscFunctionBegin;
3687   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3688   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3689     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3690   } else {
3691     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3692   }
3693   PetscFunctionReturn(0);
3694 }
3695