xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7c565772b3ecde0805329682aa5f6f26229c86ae)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*jj = a->j,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !jj) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (jj[diag[i]] != i) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1211   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1212 
1213   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1214   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1215   ierr = PetscFree(A->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_CHECK_COMPRESSED_ROW:
1259     a->compressedrow.check = flg;
1260     break;
1261   case MAT_NEW_DIAGONALS:
1262   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263   case MAT_USE_HASH_TABLE:
1264     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1265     break;
1266   case MAT_SPD:
1267   case MAT_SYMMETRIC:
1268   case MAT_STRUCTURALLY_SYMMETRIC:
1269   case MAT_HERMITIAN:
1270   case MAT_SYMMETRY_ETERNAL:
1271     /* These options are handled directly by MatSetOption() */
1272     break;
1273   default:
1274     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1281 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1282 {
1283   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1286   MatScalar      *aa,*aa_i;
1287   PetscScalar    *v_i;
1288 
1289   PetscFunctionBegin;
1290   bs  = A->rmap->bs;
1291   ai  = a->i;
1292   aj  = a->j;
1293   aa  = a->a;
1294   bs2 = a->bs2;
1295 
1296   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1297 
1298   bn  = row/bs;   /* Block number */
1299   bp  = row % bs; /* Block Position */
1300   M   = ai[bn+1] - ai[bn];
1301   *nz = bs*M;
1302 
1303   if (v) {
1304     *v = 0;
1305     if (*nz) {
1306       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1307       for (i=0; i<M; i++) { /* for each block in the block row */
1308         v_i  = *v + i*bs;
1309         aa_i = aa + bs2*(ai[bn] + i);
1310         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1311       }
1312     }
1313   }
1314 
1315   if (idx) {
1316     *idx = 0;
1317     if (*nz) {
1318       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1319       for (i=0; i<M; i++) { /* for each block in the block row */
1320         idx_i = *idx + i*bs;
1321         itmp  = bs*aj[ai[bn] + i];
1322         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1323       }
1324     }
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1337   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1345 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1346 {
1347   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1348   Mat            C;
1349   PetscErrorCode ierr;
1350   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1351   PetscInt       *rows,*cols,bs2=a->bs2;
1352   MatScalar      *array;
1353 
1354   PetscFunctionBegin;
1355   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1356   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1357     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1358     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1359 
1360     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1361     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1362     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1363     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1364     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1365     ierr = PetscFree(col);CHKERRQ(ierr);
1366   } else {
1367     C = *B;
1368   }
1369 
1370   array = a->a;
1371   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1372   for (i=0; i<mbs; i++) {
1373     cols[0] = i*bs;
1374     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1375     len = ai[i+1] - ai[i];
1376     for (j=0; j<len; j++) {
1377       rows[0] = (*aj++)*bs;
1378       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1379       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1380       array += bs2;
1381     }
1382   }
1383   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1384 
1385   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387 
1388   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1389     *B = C;
1390   } else {
1391     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1392   }
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1398 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1399 {
1400   PetscErrorCode ierr;
1401   Mat            Btrans;
1402 
1403   PetscFunctionBegin;
1404   *f   = PETSC_FALSE;
1405   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1406   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1407   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1413 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1414 {
1415   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1418   int            fd;
1419   PetscScalar    *aa;
1420   FILE           *file;
1421 
1422   PetscFunctionBegin;
1423   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1424   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1425   col_lens[0] = MAT_FILE_CLASSID;
1426 
1427   col_lens[1] = A->rmap->N;
1428   col_lens[2] = A->cmap->n;
1429   col_lens[3] = a->nz*bs2;
1430 
1431   /* store lengths of each row and write (including header) to file */
1432   count = 0;
1433   for (i=0; i<a->mbs; i++) {
1434     for (j=0; j<bs; j++) {
1435       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1436     }
1437   }
1438   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1439   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1440 
1441   /* store column indices (zero start index) */
1442   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1443   count = 0;
1444   for (i=0; i<a->mbs; i++) {
1445     for (j=0; j<bs; j++) {
1446       for (k=a->i[i]; k<a->i[i+1]; k++) {
1447         for (l=0; l<bs; l++) {
1448           jj[count++] = bs*a->j[k] + l;
1449         }
1450       }
1451     }
1452   }
1453   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1454   ierr = PetscFree(jj);CHKERRQ(ierr);
1455 
1456   /* store nonzero values */
1457   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1458   count = 0;
1459   for (i=0; i<a->mbs; i++) {
1460     for (j=0; j<bs; j++) {
1461       for (k=a->i[i]; k<a->i[i+1]; k++) {
1462         for (l=0; l<bs; l++) {
1463           aa[count++] = a->a[bs2*k + l*bs + j];
1464         }
1465       }
1466     }
1467   }
1468   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1469   ierr = PetscFree(aa);CHKERRQ(ierr);
1470 
1471   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1472   if (file) {
1473     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1480 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1481 {
1482   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1483   PetscErrorCode    ierr;
1484   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1485   PetscViewerFormat format;
1486 
1487   PetscFunctionBegin;
1488   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1489   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1490     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1491   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1492     Mat aij;
1493     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1494     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1495     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1496   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1497       PetscFunctionReturn(0);
1498   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1499     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1500     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1501     for (i=0; i<a->mbs; i++) {
1502       for (j=0; j<bs; j++) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1504         for (k=a->i[i]; k<a->i[i+1]; k++) {
1505           for (l=0; l<bs; l++) {
1506 #if defined(PETSC_USE_COMPLEX)
1507             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1508               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1509                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1510             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1512                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1513             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1515             }
1516 #else
1517             if (a->a[bs2*k + l*bs + j] != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1519             }
1520 #endif
1521           }
1522         }
1523         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1524       }
1525     }
1526     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1527   } else {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1530     for (i=0; i<a->mbs; i++) {
1531       for (j=0; j<bs; j++) {
1532         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1533         for (k=a->i[i]; k<a->i[i+1]; k++) {
1534           for (l=0; l<bs; l++) {
1535 #if defined(PETSC_USE_COMPLEX)
1536             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1538                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1539             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1541                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             } else {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             }
1545 #else
1546             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1554   }
1555   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #include <petscdraw.h>
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1562 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1563 {
1564   Mat               A = (Mat) Aa;
1565   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1566   PetscErrorCode    ierr;
1567   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1568   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1569   MatScalar         *aa;
1570   PetscViewer       viewer;
1571   PetscViewerFormat format;
1572 
1573   PetscFunctionBegin;
1574   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1575   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1576 
1577   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1578 
1579   /* loop over matrix elements drawing boxes */
1580 
1581   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1582     color = PETSC_DRAW_BLUE;
1583     for (i=0,row=0; i<mbs; i++,row+=bs) {
1584       for (j=a->i[i]; j<a->i[i+1]; j++) {
1585         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1586         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1587         aa  = a->a + j*bs2;
1588         for (k=0; k<bs; k++) {
1589           for (l=0; l<bs; l++) {
1590             if (PetscRealPart(*aa++) >=  0.) continue;
1591             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1592           }
1593         }
1594       }
1595     }
1596     color = PETSC_DRAW_CYAN;
1597     for (i=0,row=0; i<mbs; i++,row+=bs) {
1598       for (j=a->i[i]; j<a->i[i+1]; j++) {
1599         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601         aa  = a->a + j*bs2;
1602         for (k=0; k<bs; k++) {
1603           for (l=0; l<bs; l++) {
1604             if (PetscRealPart(*aa++) != 0.) continue;
1605             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1606           }
1607         }
1608       }
1609     }
1610     color = PETSC_DRAW_RED;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) <= 0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624   } else {
1625     /* use contour shading to indicate magnitude of values */
1626     /* first determine max of all nonzero values */
1627     PetscDraw popup;
1628     PetscReal scale,maxv = 0.0;
1629 
1630     for (i=0; i<a->nz*a->bs2; i++) {
1631       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1632     }
1633     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1634     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1635     if (popup) {
1636       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1637     }
1638     for (i=0,row=0; i<mbs; i++,row+=bs) {
1639       for (j=a->i[i]; j<a->i[i+1]; j++) {
1640         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642         aa  = a->a + j*bs2;
1643         for (k=0; k<bs; k++) {
1644           for (l=0; l<bs; l++) {
1645             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1646             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647           }
1648         }
1649       }
1650     }
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1657 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1658 {
1659   PetscErrorCode ierr;
1660   PetscReal      xl,yl,xr,yr,w,h;
1661   PetscDraw      draw;
1662   PetscBool      isnull;
1663 
1664   PetscFunctionBegin;
1665   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1666   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1667 
1668   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1669   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1670   xr  += w;    yr += h;  xl = -w;     yl = -h;
1671   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1672   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1673   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatView_SeqBAIJ"
1679 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1680 {
1681   PetscErrorCode ierr;
1682   PetscBool      iascii,isbinary,isdraw;
1683 
1684   PetscFunctionBegin;
1685   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1688   if (iascii) {
1689     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1690   } else if (isbinary) {
1691     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1692   } else if (isdraw) {
1693     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1694   } else {
1695     Mat B;
1696     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1697     ierr = MatView(B,viewer);CHKERRQ(ierr);
1698     ierr = MatDestroy(&B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1706 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1707 {
1708   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1709   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1710   PetscInt    *ai = a->i,*ailen = a->ilen;
1711   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1712   MatScalar   *ap,*aa = a->a;
1713 
1714   PetscFunctionBegin;
1715   for (k=0; k<m; k++) { /* loop over rows */
1716     row = im[k]; brow = row/bs;
1717     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1718     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1719     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1720     nrow = ailen[brow];
1721     for (l=0; l<n; l++) { /* loop over columns */
1722       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1723       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1724       col  = in[l];
1725       bcol = col/bs;
1726       cidx = col%bs;
1727       ridx = row%bs;
1728       high = nrow;
1729       low  = 0; /* assume unsorted */
1730       while (high-low > 5) {
1731         t = (low+high)/2;
1732         if (rp[t] > bcol) high = t;
1733         else             low  = t;
1734       }
1735       for (i=low; i<high; i++) {
1736         if (rp[i] > bcol) break;
1737         if (rp[i] == bcol) {
1738           *v++ = ap[bs2*i+bs*cidx+ridx];
1739           goto finished;
1740         }
1741       }
1742       *v++ = 0.0;
1743 finished:;
1744     }
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1751 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1752 {
1753   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1754   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1755   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1756   PetscErrorCode    ierr;
1757   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1758   PetscBool         roworiented=a->roworiented;
1759   const PetscScalar *value     = v;
1760   MatScalar         *ap,*aa = a->a,*bap;
1761 
1762   PetscFunctionBegin;
1763   if (roworiented) {
1764     stepval = (n-1)*bs;
1765   } else {
1766     stepval = (m-1)*bs;
1767   }
1768   for (k=0; k<m; k++) { /* loop over added rows */
1769     row = im[k];
1770     if (row < 0) continue;
1771 #if defined(PETSC_USE_DEBUG)
1772     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1773 #endif
1774     rp   = aj + ai[row];
1775     ap   = aa + bs2*ai[row];
1776     rmax = imax[row];
1777     nrow = ailen[row];
1778     low  = 0;
1779     high = nrow;
1780     for (l=0; l<n; l++) { /* loop over added columns */
1781       if (in[l] < 0) continue;
1782 #if defined(PETSC_USE_DEBUG)
1783       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1784 #endif
1785       col = in[l];
1786       if (roworiented) {
1787         value = v + (k*(stepval+bs) + l)*bs;
1788       } else {
1789         value = v + (l*(stepval+bs) + k)*bs;
1790       }
1791       if (col <= lastcol) low = 0;
1792       else high = nrow;
1793       lastcol = col;
1794       while (high-low > 7) {
1795         t = (low+high)/2;
1796         if (rp[t] > col) high = t;
1797         else             low  = t;
1798       }
1799       for (i=low; i<high; i++) {
1800         if (rp[i] > col) break;
1801         if (rp[i] == col) {
1802           bap = ap +  bs2*i;
1803           if (roworiented) {
1804             if (is == ADD_VALUES) {
1805               for (ii=0; ii<bs; ii++,value+=stepval) {
1806                 for (jj=ii; jj<bs2; jj+=bs) {
1807                   bap[jj] += *value++;
1808                 }
1809               }
1810             } else {
1811               for (ii=0; ii<bs; ii++,value+=stepval) {
1812                 for (jj=ii; jj<bs2; jj+=bs) {
1813                   bap[jj] = *value++;
1814                 }
1815               }
1816             }
1817           } else {
1818             if (is == ADD_VALUES) {
1819               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1820                 for (jj=0; jj<bs; jj++) {
1821                   bap[jj] += value[jj];
1822                 }
1823                 bap += bs;
1824               }
1825             } else {
1826               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1827                 for (jj=0; jj<bs; jj++) {
1828                   bap[jj]  = value[jj];
1829                 }
1830                 bap += bs;
1831               }
1832             }
1833           }
1834           goto noinsert2;
1835         }
1836       }
1837       if (nonew == 1) goto noinsert2;
1838       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1839       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1840       N = nrow++ - 1; high++;
1841       /* shift up all the later entries in this row */
1842       for (ii=N; ii>=i; ii--) {
1843         rp[ii+1] = rp[ii];
1844         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1845       }
1846       if (N >= i) {
1847         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1848       }
1849       rp[i] = col;
1850       bap   = ap +  bs2*i;
1851       if (roworiented) {
1852         for (ii=0; ii<bs; ii++,value+=stepval) {
1853           for (jj=ii; jj<bs2; jj+=bs) {
1854             bap[jj] = *value++;
1855           }
1856         }
1857       } else {
1858         for (ii=0; ii<bs; ii++,value+=stepval) {
1859           for (jj=0; jj<bs; jj++) {
1860             *bap++ = *value++;
1861           }
1862         }
1863       }
1864 noinsert2:;
1865       low = i;
1866     }
1867     ailen[row] = nrow;
1868   }
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1874 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1875 {
1876   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1877   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1878   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1879   PetscErrorCode ierr;
1880   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1881   MatScalar      *aa  = a->a,*ap;
1882   PetscReal      ratio=0.6;
1883 
1884   PetscFunctionBegin;
1885   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1886 
1887   if (m) rmax = ailen[0];
1888   for (i=1; i<mbs; i++) {
1889     /* move each row back by the amount of empty slots (fshift) before it*/
1890     fshift += imax[i-1] - ailen[i-1];
1891     rmax    = PetscMax(rmax,ailen[i]);
1892     if (fshift) {
1893       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1894       N  = ailen[i];
1895       for (j=0; j<N; j++) {
1896         ip[j-fshift] = ip[j];
1897 
1898         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1899       }
1900     }
1901     ai[i] = ai[i-1] + ailen[i-1];
1902   }
1903   if (mbs) {
1904     fshift += imax[mbs-1] - ailen[mbs-1];
1905     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1906   }
1907 
1908   /* reset ilen and imax for each row */
1909   a->nonzerorowcnt = 0;
1910   for (i=0; i<mbs; i++) {
1911     ailen[i] = imax[i] = ai[i+1] - ai[i];
1912     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1913   }
1914   a->nz = ai[mbs];
1915 
1916   /* diagonals may have moved, so kill the diagonal pointers */
1917   a->idiagvalid = PETSC_FALSE;
1918   if (fshift && a->diag) {
1919     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1920     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1921     a->diag = 0;
1922   }
1923   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1924   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1925   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1926   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1927 
1928   A->info.mallocs    += a->reallocs;
1929   a->reallocs         = 0;
1930   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1931 
1932   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1933 
1934   A->same_nonzero = PETSC_TRUE;
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 /*
1939    This function returns an array of flags which indicate the locations of contiguous
1940    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1941    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1942    Assume: sizes should be long enough to hold all the values.
1943 */
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1947 {
1948   PetscInt  i,j,k,row;
1949   PetscBool flg;
1950 
1951   PetscFunctionBegin;
1952   for (i=0,j=0; i<n; j++) {
1953     row = idx[i];
1954     if (row%bs!=0) { /* Not the begining of a block */
1955       sizes[j] = 1;
1956       i++;
1957     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1958       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1959       i++;
1960     } else { /* Begining of the block, so check if the complete block exists */
1961       flg = PETSC_TRUE;
1962       for (k=1; k<bs; k++) {
1963         if (row+k != idx[i+k]) { /* break in the block */
1964           flg = PETSC_FALSE;
1965           break;
1966         }
1967       }
1968       if (flg) { /* No break in the bs */
1969         sizes[j] = bs;
1970         i       += bs;
1971       } else {
1972         sizes[j] = 1;
1973         i++;
1974       }
1975     }
1976   }
1977   *bs_max = j;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1983 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1984 {
1985   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1986   PetscErrorCode    ierr;
1987   PetscInt          i,j,k,count,*rows;
1988   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1989   PetscScalar       zero = 0.0;
1990   MatScalar         *aa;
1991   const PetscScalar *xx;
1992   PetscScalar       *bb;
1993 
1994   PetscFunctionBegin;
1995   /* fix right hand side if needed */
1996   if (x && b) {
1997     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1998     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1999     for (i=0; i<is_n; i++) {
2000       bb[is_idx[i]] = diag*xx[is_idx[i]];
2001     }
2002     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2003     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2004   }
2005 
2006   /* Make a copy of the IS and  sort it */
2007   /* allocate memory for rows,sizes */
2008   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2009 
2010   /* copy IS values to rows, and sort them */
2011   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2012   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2013 
2014   if (baij->keepnonzeropattern) {
2015     for (i=0; i<is_n; i++) sizes[i] = 1;
2016     bs_max          = is_n;
2017     A->same_nonzero = PETSC_TRUE;
2018   } else {
2019     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2020 
2021     A->same_nonzero = PETSC_FALSE;
2022   }
2023 
2024   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2025     row = rows[j];
2026     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2027     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2028     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2029     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2030       if (diag != (PetscScalar)0.0) {
2031         if (baij->ilen[row/bs] > 0) {
2032           baij->ilen[row/bs]       = 1;
2033           baij->j[baij->i[row/bs]] = row/bs;
2034 
2035           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2036         }
2037         /* Now insert all the diagonal values for this bs */
2038         for (k=0; k<bs; k++) {
2039           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2040         }
2041       } else { /* (diag == 0.0) */
2042         baij->ilen[row/bs] = 0;
2043       } /* end (diag == 0.0) */
2044     } else { /* (sizes[i] != bs) */
2045 #if defined(PETSC_USE_DEBUG)
2046       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2047 #endif
2048       for (k=0; k<count; k++) {
2049         aa[0] =  zero;
2050         aa   += bs;
2051       }
2052       if (diag != (PetscScalar)0.0) {
2053         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2054       }
2055     }
2056   }
2057 
2058   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2059   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2065 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2066 {
2067   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2068   PetscErrorCode    ierr;
2069   PetscInt          i,j,k,count;
2070   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2071   PetscScalar       zero = 0.0;
2072   MatScalar         *aa;
2073   const PetscScalar *xx;
2074   PetscScalar       *bb;
2075   PetscBool         *zeroed,vecs = PETSC_FALSE;
2076 
2077   PetscFunctionBegin;
2078   /* fix right hand side if needed */
2079   if (x && b) {
2080     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2082     vecs = PETSC_TRUE;
2083   }
2084   A->same_nonzero = PETSC_TRUE;
2085 
2086   /* zero the columns */
2087   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2088   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2089   for (i=0; i<is_n; i++) {
2090     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2091     zeroed[is_idx[i]] = PETSC_TRUE;
2092   }
2093   for (i=0; i<A->rmap->N; i++) {
2094     if (!zeroed[i]) {
2095       row = i/bs;
2096       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2097         for (k=0; k<bs; k++) {
2098           col = bs*baij->j[j] + k;
2099           if (zeroed[col]) {
2100             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2101             if (vecs) bb[i] -= aa[0]*xx[col];
2102             aa[0] = 0.0;
2103           }
2104         }
2105       }
2106     } else if (vecs) bb[i] = diag*xx[i];
2107   }
2108   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2109   if (vecs) {
2110     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2112   }
2113 
2114   /* zero the rows */
2115   for (i=0; i<is_n; i++) {
2116     row   = is_idx[i];
2117     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2118     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2119     for (k=0; k<count; k++) {
2120       aa[0] =  zero;
2121       aa   += bs;
2122     }
2123     if (diag != (PetscScalar)0.0) {
2124       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2125     }
2126   }
2127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2133 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2134 {
2135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2136   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2137   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2138   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2139   PetscErrorCode ierr;
2140   PetscInt       ridx,cidx,bs2=a->bs2;
2141   PetscBool      roworiented=a->roworiented;
2142   MatScalar      *ap,value,*aa=a->a,*bap;
2143 
2144   PetscFunctionBegin;
2145   if (v) PetscValidScalarPointer(v,6);
2146   for (k=0; k<m; k++) { /* loop over added rows */
2147     row  = im[k];
2148     brow = row/bs;
2149     if (row < 0) continue;
2150 #if defined(PETSC_USE_DEBUG)
2151     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2152 #endif
2153     rp   = aj + ai[brow];
2154     ap   = aa + bs2*ai[brow];
2155     rmax = imax[brow];
2156     nrow = ailen[brow];
2157     low  = 0;
2158     high = nrow;
2159     for (l=0; l<n; l++) { /* loop over added columns */
2160       if (in[l] < 0) continue;
2161 #if defined(PETSC_USE_DEBUG)
2162       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2163 #endif
2164       col  = in[l]; bcol = col/bs;
2165       ridx = row % bs; cidx = col % bs;
2166       if (roworiented) {
2167         value = v[l + k*n];
2168       } else {
2169         value = v[k + l*m];
2170       }
2171       if (col <= lastcol) low = 0; else high = nrow;
2172       lastcol = col;
2173       while (high-low > 7) {
2174         t = (low+high)/2;
2175         if (rp[t] > bcol) high = t;
2176         else              low  = t;
2177       }
2178       for (i=low; i<high; i++) {
2179         if (rp[i] > bcol) break;
2180         if (rp[i] == bcol) {
2181           bap = ap +  bs2*i + bs*cidx + ridx;
2182           if (is == ADD_VALUES) *bap += value;
2183           else                  *bap  = value;
2184           goto noinsert1;
2185         }
2186       }
2187       if (nonew == 1) goto noinsert1;
2188       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2189       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2190       N = nrow++ - 1; high++;
2191       /* shift up all the later entries in this row */
2192       for (ii=N; ii>=i; ii--) {
2193         rp[ii+1] = rp[ii];
2194         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2195       }
2196       if (N>=i) {
2197         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2198       }
2199       rp[i]                      = bcol;
2200       ap[bs2*i + bs*cidx + ridx] = value;
2201       a->nz++;
2202 noinsert1:;
2203       low = i;
2204     }
2205     ailen[brow] = nrow;
2206   }
2207   A->same_nonzero = PETSC_FALSE;
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2213 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2214 {
2215   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2216   Mat            outA;
2217   PetscErrorCode ierr;
2218   PetscBool      row_identity,col_identity;
2219 
2220   PetscFunctionBegin;
2221   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2222   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2223   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2224   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2225 
2226   outA            = inA;
2227   inA->factortype = MAT_FACTOR_LU;
2228 
2229   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2230 
2231   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2232   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2233   a->row = row;
2234   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2235   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2236   a->col = col;
2237 
2238   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2239   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2240   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2241   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2242 
2243   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2244   if (!a->solve_work) {
2245     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2246     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2247   }
2248   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2254 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2255 {
2256   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2257   PetscInt    i,nz,mbs;
2258 
2259   PetscFunctionBegin;
2260   nz  = baij->maxnz;
2261   mbs = baij->mbs;
2262   for (i=0; i<nz; i++) {
2263     baij->j[i] = indices[i];
2264   }
2265   baij->nz = nz;
2266   for (i=0; i<mbs; i++) {
2267     baij->ilen[i] = baij->imax[i];
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2274 /*@
2275     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2276        in the matrix.
2277 
2278   Input Parameters:
2279 +  mat - the SeqBAIJ matrix
2280 -  indices - the column indices
2281 
2282   Level: advanced
2283 
2284   Notes:
2285     This can be called if you have precomputed the nonzero structure of the
2286   matrix and want to provide it to the matrix object to improve the performance
2287   of the MatSetValues() operation.
2288 
2289     You MUST have set the correct numbers of nonzeros per row in the call to
2290   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2291 
2292     MUST be called before any calls to MatSetValues();
2293 
2294 @*/
2295 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2296 {
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2301   PetscValidPointer(indices,2);
2302   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2308 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2309 {
2310   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2311   PetscErrorCode ierr;
2312   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2313   PetscReal      atmp;
2314   PetscScalar    *x,zero = 0.0;
2315   MatScalar      *aa;
2316   PetscInt       ncols,brow,krow,kcol;
2317 
2318   PetscFunctionBegin;
2319   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2320   bs  = A->rmap->bs;
2321   aa  = a->a;
2322   ai  = a->i;
2323   aj  = a->j;
2324   mbs = a->mbs;
2325 
2326   ierr = VecSet(v,zero);CHKERRQ(ierr);
2327   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2328   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2329   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2330   for (i=0; i<mbs; i++) {
2331     ncols = ai[1] - ai[0]; ai++;
2332     brow  = bs*i;
2333     for (j=0; j<ncols; j++) {
2334       for (kcol=0; kcol<bs; kcol++) {
2335         for (krow=0; krow<bs; krow++) {
2336           atmp = PetscAbsScalar(*aa);aa++;
2337           row  = brow + krow;   /* row index */
2338           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2339           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2340         }
2341       }
2342       aj++;
2343     }
2344   }
2345   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatCopy_SeqBAIJ"
2351 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2352 {
2353   PetscErrorCode ierr;
2354 
2355   PetscFunctionBegin;
2356   /* If the two matrices have the same copy implementation, use fast copy. */
2357   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2358     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2359     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2360     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2361 
2362     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2363     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2364     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2365   } else {
2366     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2367   }
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2373 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2374 {
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2384 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2385 {
2386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2387 
2388   PetscFunctionBegin;
2389   *array = a->a;
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2395 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2396 {
2397   PetscFunctionBegin;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2403 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2404 {
2405   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2408   PetscBLASInt   one=1;
2409 
2410   PetscFunctionBegin;
2411   if (str == SAME_NONZERO_PATTERN) {
2412     PetscScalar  alpha = a;
2413     PetscBLASInt bnz;
2414     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2415     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2416   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2417     if (y->xtoy && y->XtoY != X) {
2418       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2419       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2420     }
2421     if (!y->xtoy) { /* get xtoy */
2422       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2423       y->XtoY = X;
2424       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2425     }
2426     for (i=0; i<x->nz; i++) {
2427       j = 0;
2428       while (j < bs2) {
2429         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2430         j++;
2431       }
2432     }
2433     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2434   } else {
2435     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2436   }
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2442 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2443 {
2444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2446   MatScalar   *aa = a->a;
2447 
2448   PetscFunctionBegin;
2449   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2455 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2456 {
2457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2458   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2459   MatScalar   *aa = a->a;
2460 
2461   PetscFunctionBegin;
2462   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2470 /*
2471     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2472 */
2473 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2474 {
2475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2476   PetscErrorCode ierr;
2477   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2478   PetscInt       nz = a->i[m],row,*jj,mr,col;
2479 
2480   PetscFunctionBegin;
2481   *nn = n;
2482   if (!ia) PetscFunctionReturn(0);
2483   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2484   else {
2485     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2486     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2487     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2488     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2489     jj   = a->j;
2490     for (i=0; i<nz; i++) {
2491       collengths[jj[i]]++;
2492     }
2493     cia[0] = oshift;
2494     for (i=0; i<n; i++) {
2495       cia[i+1] = cia[i] + collengths[i];
2496     }
2497     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2498     jj   = a->j;
2499     for (row=0; row<m; row++) {
2500       mr = a->i[row+1] - a->i[row];
2501       for (i=0; i<mr; i++) {
2502         col = *jj++;
2503 
2504         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2505       }
2506     }
2507     ierr = PetscFree(collengths);CHKERRQ(ierr);
2508     *ia  = cia; *ja = cja;
2509   }
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2515 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2516 {
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   if (!ia) PetscFunctionReturn(0);
2521   ierr = PetscFree(*ia);CHKERRQ(ierr);
2522   ierr = PetscFree(*ja);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2528 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2529 {
2530   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2531   PetscErrorCode ierr;
2532   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2533   PetscScalar    dx,*y,*xx,*w3_array;
2534   PetscScalar    *vscale_array;
2535   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2536   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2537   void           *fctx   = coloring->fctx;
2538   PetscBool      flg     = PETSC_FALSE;
2539   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2540   Vec            x1_tmp;
2541 
2542   PetscFunctionBegin;
2543   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2544   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2545   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2546   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2547 
2548   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2549   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2550   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2551   if (flg) {
2552     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2553   } else {
2554     PetscBool assembled;
2555     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2556     if (assembled) {
2557       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2558     }
2559   }
2560 
2561   x1_tmp = x1;
2562   if (!coloring->vscale) {
2563     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2564   }
2565 
2566   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2567     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2568   }
2569   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2570 
2571   /* Set w1 = F(x1) */
2572   if (!coloring->fset) {
2573     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2574     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2575     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2576   } else {
2577     coloring->fset = PETSC_FALSE;
2578   }
2579 
2580   if (!coloring->w3) {
2581     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2582     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2583   }
2584   w3 = coloring->w3;
2585 
2586   /* Compute all the local scale factors, including ghost points */
2587   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2588   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2589   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2590   if (ctype == IS_COLORING_GHOSTED) {
2591     col_start = 0; col_end = N;
2592   } else if (ctype == IS_COLORING_GLOBAL) {
2593     xx           = xx - start;
2594     vscale_array = vscale_array - start;
2595     col_start    = start; col_end = N + start;
2596   }
2597   for (col=col_start; col<col_end; col++) {
2598     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2599     if (coloring->htype[0] == 'w') {
2600       dx = 1.0 + unorm;
2601     } else {
2602       dx = xx[col];
2603     }
2604     if (dx == (PetscScalar)0.0) dx = 1.0;
2605     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2606     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2607     dx               *= epsilon;
2608     vscale_array[col] = (PetscScalar)1.0/dx;
2609   }
2610   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2611   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2612   if (ctype == IS_COLORING_GLOBAL) {
2613     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2614     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2615   }
2616   if (coloring->vscaleforrow) {
2617     vscaleforrow = coloring->vscaleforrow;
2618   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2619 
2620   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2621   /*
2622     Loop over each color
2623   */
2624   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2625   for (k=0; k<coloring->ncolors; k++) {
2626     coloring->currentcolor = k;
2627     for (i=0; i<bs; i++) {
2628       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2629       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2630       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2631       /*
2632         Loop over each column associated with color
2633         adding the perturbation to the vector w3.
2634       */
2635       for (l=0; l<coloring->ncolumns[k]; l++) {
2636         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2637         if (coloring->htype[0] == 'w') {
2638           dx = 1.0 + unorm;
2639         } else {
2640           dx = xx[col];
2641         }
2642         if (dx == (PetscScalar)0.0) dx = 1.0;
2643         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2644         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2645         dx *= epsilon;
2646         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2647         w3_array[col] += dx;
2648       }
2649       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2650       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2651 
2652       /*
2653         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2654         w2 = F(x1 + dx) - F(x1)
2655       */
2656       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2657       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2658       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2659       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2660 
2661       /*
2662         Loop over rows of vector, putting results into Jacobian matrix
2663       */
2664       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2665       for (l=0; l<coloring->nrows[k]; l++) {
2666         row = bs*coloring->rows[k][l];                /* local row index */
2667         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2668         for (j=0; j<bs; j++) {
2669           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2670           srows[j]  = row + start + j;
2671         }
2672         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2673       }
2674       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2675     }
2676   } /* endof for each color */
2677   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2678   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2679   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2680   ierr = PetscFree(srows);CHKERRQ(ierr);
2681 
2682   coloring->currentcolor = -1;
2683 
2684   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 /* -------------------------------------------------------------------*/
2691 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2692                                        MatGetRow_SeqBAIJ,
2693                                        MatRestoreRow_SeqBAIJ,
2694                                        MatMult_SeqBAIJ_N,
2695                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2696                                        MatMultTranspose_SeqBAIJ,
2697                                        MatMultTransposeAdd_SeqBAIJ,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /* 10*/ 0,
2702                                        MatLUFactor_SeqBAIJ,
2703                                        0,
2704                                        0,
2705                                        MatTranspose_SeqBAIJ,
2706                                /* 15*/ MatGetInfo_SeqBAIJ,
2707                                        MatEqual_SeqBAIJ,
2708                                        MatGetDiagonal_SeqBAIJ,
2709                                        MatDiagonalScale_SeqBAIJ,
2710                                        MatNorm_SeqBAIJ,
2711                                /* 20*/ 0,
2712                                        MatAssemblyEnd_SeqBAIJ,
2713                                        MatSetOption_SeqBAIJ,
2714                                        MatZeroEntries_SeqBAIJ,
2715                                /* 24*/ MatZeroRows_SeqBAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /* 29*/ MatSetUp_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                /* 34*/ MatDuplicate_SeqBAIJ,
2726                                        0,
2727                                        0,
2728                                        MatILUFactor_SeqBAIJ,
2729                                        0,
2730                                /* 39*/ MatAXPY_SeqBAIJ,
2731                                        MatGetSubMatrices_SeqBAIJ,
2732                                        MatIncreaseOverlap_SeqBAIJ,
2733                                        MatGetValues_SeqBAIJ,
2734                                        MatCopy_SeqBAIJ,
2735                                /* 44*/ 0,
2736                                        MatScale_SeqBAIJ,
2737                                        0,
2738                                        0,
2739                                        MatZeroRowsColumns_SeqBAIJ,
2740                                /* 49*/ 0,
2741                                        MatGetRowIJ_SeqBAIJ,
2742                                        MatRestoreRowIJ_SeqBAIJ,
2743                                        MatGetColumnIJ_SeqBAIJ,
2744                                        MatRestoreColumnIJ_SeqBAIJ,
2745                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                        MatSetValuesBlocked_SeqBAIJ,
2750                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2751                                        MatDestroy_SeqBAIJ,
2752                                        MatView_SeqBAIJ,
2753                                        0,
2754                                        0,
2755                                /* 64*/ 0,
2756                                        0,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2761                                        0,
2762                                        MatConvert_Basic,
2763                                        0,
2764                                        0,
2765                                /* 74*/ 0,
2766                                        MatFDColoringApply_BAIJ,
2767                                        0,
2768                                        0,
2769                                        0,
2770                                /* 79*/ 0,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                        MatLoad_SeqBAIJ,
2775                                /* 84*/ 0,
2776                                        0,
2777                                        0,
2778                                        0,
2779                                        0,
2780                                /* 89*/ 0,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                /* 94*/ 0,
2786                                        0,
2787                                        0,
2788                                        0,
2789                                        0,
2790                                /* 99*/ 0,
2791                                        0,
2792                                        0,
2793                                        0,
2794                                        0,
2795                                /*104*/ 0,
2796                                        MatRealPart_SeqBAIJ,
2797                                        MatImaginaryPart_SeqBAIJ,
2798                                        0,
2799                                        0,
2800                                /*109*/ 0,
2801                                        0,
2802                                        0,
2803                                        0,
2804                                        MatMissingDiagonal_SeqBAIJ,
2805                                /*114*/ 0,
2806                                        0,
2807                                        0,
2808                                        0,
2809                                        0,
2810                                /*119*/ 0,
2811                                        0,
2812                                        MatMultHermitianTranspose_SeqBAIJ,
2813                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2814                                        0,
2815                                /*124*/ 0,
2816                                        0,
2817                                        MatInvertBlockDiagonal_SeqBAIJ,
2818                                        0,
2819                                        0,
2820                                /*129*/ 0,
2821                                        0,
2822                                        0,
2823                                        0,
2824                                        0,
2825                                /*134*/ 0,
2826                                        0,
2827                                        0,
2828                                        0,
2829                                        0,
2830                                /*139*/ 0,
2831                                        0
2832 };
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2836 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2837 {
2838   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2839   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2844 
2845   /* allocate space for values if not already there */
2846   if (!aij->saved_values) {
2847     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2848     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2849   }
2850 
2851   /* copy values over */
2852   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 #undef __FUNCT__
2857 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2858 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2859 {
2860   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2861   PetscErrorCode ierr;
2862   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2863 
2864   PetscFunctionBegin;
2865   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2866   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2867 
2868   /* copy values over */
2869   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2874 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2878 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2879 {
2880   Mat_SeqBAIJ    *b;
2881   PetscErrorCode ierr;
2882   PetscInt       i,mbs,nbs,bs2;
2883   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2884 
2885   PetscFunctionBegin;
2886   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2887   if (nz == MAT_SKIP_ALLOCATION) {
2888     skipallocation = PETSC_TRUE;
2889     nz             = 0;
2890   }
2891 
2892   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2896   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2897 
2898   B->preallocated = PETSC_TRUE;
2899 
2900   mbs = B->rmap->n/bs;
2901   nbs = B->cmap->n/bs;
2902   bs2 = bs*bs;
2903 
2904   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2905 
2906   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2907   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2908   if (nnz) {
2909     for (i=0; i<mbs; i++) {
2910       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2911       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2912     }
2913   }
2914 
2915   b    = (Mat_SeqBAIJ*)B->data;
2916   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2917   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2918   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2919 
2920   if (!flg) {
2921     switch (bs) {
2922     case 1:
2923       B->ops->mult    = MatMult_SeqBAIJ_1;
2924       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2925       break;
2926     case 2:
2927       B->ops->mult    = MatMult_SeqBAIJ_2;
2928       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2929       break;
2930     case 3:
2931       B->ops->mult    = MatMult_SeqBAIJ_3;
2932       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2933       break;
2934     case 4:
2935       B->ops->mult    = MatMult_SeqBAIJ_4;
2936       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2937       break;
2938     case 5:
2939       B->ops->mult    = MatMult_SeqBAIJ_5;
2940       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2941       break;
2942     case 6:
2943       B->ops->mult    = MatMult_SeqBAIJ_6;
2944       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2945       break;
2946     case 7:
2947       B->ops->mult    = MatMult_SeqBAIJ_7;
2948       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2949       break;
2950     case 15:
2951       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2952       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2953       break;
2954     default:
2955       B->ops->mult    = MatMult_SeqBAIJ_N;
2956       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2957       break;
2958     }
2959   }
2960   B->ops->sor = MatSOR_SeqBAIJ;
2961   b->mbs = mbs;
2962   b->nbs = nbs;
2963   if (!skipallocation) {
2964     if (!b->imax) {
2965       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2966       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2967 
2968       b->free_imax_ilen = PETSC_TRUE;
2969     }
2970     /* b->ilen will count nonzeros in each block row so far. */
2971     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2972     if (!nnz) {
2973       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2974       else if (nz < 0) nz = 1;
2975       for (i=0; i<mbs; i++) b->imax[i] = nz;
2976       nz = nz*mbs;
2977     } else {
2978       nz = 0;
2979       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2980     }
2981 
2982     /* allocate the matrix space */
2983     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2984     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2985     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2986     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2987     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2988 
2989     b->singlemalloc = PETSC_TRUE;
2990     b->i[0]         = 0;
2991     for (i=1; i<mbs+1; i++) {
2992       b->i[i] = b->i[i-1] + b->imax[i-1];
2993     }
2994     b->free_a  = PETSC_TRUE;
2995     b->free_ij = PETSC_TRUE;
2996   } else {
2997     b->free_a  = PETSC_FALSE;
2998     b->free_ij = PETSC_FALSE;
2999   }
3000 
3001   b->bs2              = bs2;
3002   b->mbs              = mbs;
3003   b->nz               = 0;
3004   b->maxnz            = nz;
3005   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3006   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3012 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3013 {
3014   PetscInt       i,m,nz,nz_max=0,*nnz;
3015   PetscScalar    *values=0;
3016   PetscErrorCode ierr;
3017 
3018   PetscFunctionBegin;
3019   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3020   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3021   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3023   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3024   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3025   m    = B->rmap->n/bs;
3026 
3027   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3028   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3029   for (i=0; i<m; i++) {
3030     nz = ii[i+1]- ii[i];
3031     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3032     nz_max = PetscMax(nz_max, nz);
3033     nnz[i] = nz;
3034   }
3035   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3036   ierr = PetscFree(nnz);CHKERRQ(ierr);
3037 
3038   values = (PetscScalar*)V;
3039   if (!values) {
3040     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3041     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3042   }
3043   for (i=0; i<m; i++) {
3044     PetscInt          ncols  = ii[i+1] - ii[i];
3045     const PetscInt    *icols = jj + ii[i];
3046     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3047     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3048   }
3049   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3050   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3057 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3058 #if defined(PETSC_HAVE_MUMPS)
3059 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3060 #endif
3061 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3062 
3063 /*MC
3064    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3065    block sparse compressed row format.
3066 
3067    Options Database Keys:
3068 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3069 
3070   Level: beginner
3071 
3072 .seealso: MatCreateSeqBAIJ()
3073 M*/
3074 
3075 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatCreate_SeqBAIJ"
3079 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3080 {
3081   PetscErrorCode ierr;
3082   PetscMPIInt    size;
3083   Mat_SeqBAIJ    *b;
3084 
3085   PetscFunctionBegin;
3086   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3087   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3088 
3089   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3090   B->data = (void*)b;
3091   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3092 
3093   b->row          = 0;
3094   b->col          = 0;
3095   b->icol         = 0;
3096   b->reallocs     = 0;
3097   b->saved_values = 0;
3098 
3099   b->roworiented        = PETSC_TRUE;
3100   b->nonew              = 0;
3101   b->diag               = 0;
3102   b->solve_work         = 0;
3103   b->mult_work          = 0;
3104   B->spptr              = 0;
3105   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3106   b->keepnonzeropattern = PETSC_FALSE;
3107   b->xtoy               = 0;
3108   b->XtoY               = 0;
3109   B->same_nonzero       = PETSC_FALSE;
3110 
3111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3114 #if defined(PETSC_HAVE_MUMPS)
3115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3116 #endif
3117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3127   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3128   PetscFunctionReturn(0);
3129 }
3130 
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3133 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3134 {
3135   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3136   PetscErrorCode ierr;
3137   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3138 
3139   PetscFunctionBegin;
3140   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3141 
3142   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3143     c->imax           = a->imax;
3144     c->ilen           = a->ilen;
3145     c->free_imax_ilen = PETSC_FALSE;
3146   } else {
3147     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3148     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3149     for (i=0; i<mbs; i++) {
3150       c->imax[i] = a->imax[i];
3151       c->ilen[i] = a->ilen[i];
3152     }
3153     c->free_imax_ilen = PETSC_TRUE;
3154   }
3155 
3156   /* allocate the matrix space */
3157   if (mallocmatspace) {
3158     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3159       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3160       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3161       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3162 
3163       c->i            = a->i;
3164       c->j            = a->j;
3165       c->singlemalloc = PETSC_FALSE;
3166       c->free_a       = PETSC_TRUE;
3167       c->free_ij      = PETSC_FALSE;
3168       c->parent       = A;
3169       C->preallocated = PETSC_TRUE;
3170       C->assembled    = PETSC_TRUE;
3171 
3172       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3173       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3174       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3175     } else {
3176       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3177       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3178 
3179       c->singlemalloc = PETSC_TRUE;
3180       c->free_a       = PETSC_TRUE;
3181       c->free_ij      = PETSC_TRUE;
3182 
3183       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3184       if (mbs > 0) {
3185         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3186         if (cpvalues == MAT_COPY_VALUES) {
3187           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3188         } else {
3189           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3190         }
3191       }
3192       C->preallocated = PETSC_TRUE;
3193       C->assembled    = PETSC_TRUE;
3194     }
3195   }
3196 
3197   c->roworiented = a->roworiented;
3198   c->nonew       = a->nonew;
3199 
3200   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3201   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3202 
3203   c->bs2         = a->bs2;
3204   c->mbs         = a->mbs;
3205   c->nbs         = a->nbs;
3206 
3207   if (a->diag) {
3208     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3209       c->diag      = a->diag;
3210       c->free_diag = PETSC_FALSE;
3211     } else {
3212       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3213       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3214       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3215       c->free_diag = PETSC_TRUE;
3216     }
3217   } else c->diag = 0;
3218 
3219   c->nz         = a->nz;
3220   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3221   c->solve_work = 0;
3222   c->mult_work  = 0;
3223 
3224   c->compressedrow.use   = a->compressedrow.use;
3225   c->compressedrow.nrows = a->compressedrow.nrows;
3226   c->compressedrow.check = a->compressedrow.check;
3227   if (a->compressedrow.use) {
3228     i    = a->compressedrow.nrows;
3229     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3230     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3231     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3232     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3233   } else {
3234     c->compressedrow.use    = PETSC_FALSE;
3235     c->compressedrow.i      = NULL;
3236     c->compressedrow.rindex = NULL;
3237   }
3238   C->same_nonzero = A->same_nonzero;
3239 
3240   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3241   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3247 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3248 {
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3253   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3254   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3255   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatLoad_SeqBAIJ"
3261 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3262 {
3263   Mat_SeqBAIJ    *a;
3264   PetscErrorCode ierr;
3265   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3266   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3267   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3268   PetscInt       *masked,nmask,tmp,bs2,ishift;
3269   PetscMPIInt    size;
3270   int            fd;
3271   PetscScalar    *aa;
3272   MPI_Comm       comm;
3273 
3274   PetscFunctionBegin;
3275   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3276   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3277   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3278   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3279   bs2  = bs*bs;
3280 
3281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3282   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3283   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3284   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3285   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3286   M = header[1]; N = header[2]; nz = header[3];
3287 
3288   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3289   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3290 
3291   /*
3292      This code adds extra rows to make sure the number of rows is
3293     divisible by the blocksize
3294   */
3295   mbs        = M/bs;
3296   extra_rows = bs - M + bs*(mbs);
3297   if (extra_rows == bs) extra_rows = 0;
3298   else mbs++;
3299   if (extra_rows) {
3300     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3301   }
3302 
3303   /* Set global sizes if not already set */
3304   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3305     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3306   } else { /* Check if the matrix global sizes are correct */
3307     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3308     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3309       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3310     }
3311     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3312   }
3313 
3314   /* read in row lengths */
3315   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3316   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3317   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3318 
3319   /* read in column indices */
3320   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3321   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3322   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3323 
3324   /* loop over row lengths determining block row lengths */
3325   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3326   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3327   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3328   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3329   rowcount = 0;
3330   nzcount  = 0;
3331   for (i=0; i<mbs; i++) {
3332     nmask = 0;
3333     for (j=0; j<bs; j++) {
3334       kmax = rowlengths[rowcount];
3335       for (k=0; k<kmax; k++) {
3336         tmp = jj[nzcount++]/bs;
3337         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3338       }
3339       rowcount++;
3340     }
3341     browlengths[i] += nmask;
3342     /* zero out the mask elements we set */
3343     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3344   }
3345 
3346   /* Do preallocation  */
3347   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3348   a    = (Mat_SeqBAIJ*)newmat->data;
3349 
3350   /* set matrix "i" values */
3351   a->i[0] = 0;
3352   for (i=1; i<= mbs; i++) {
3353     a->i[i]      = a->i[i-1] + browlengths[i-1];
3354     a->ilen[i-1] = browlengths[i-1];
3355   }
3356   a->nz = 0;
3357   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3358 
3359   /* read in nonzero values */
3360   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3361   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3362   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3363 
3364   /* set "a" and "j" values into matrix */
3365   nzcount = 0; jcount = 0;
3366   for (i=0; i<mbs; i++) {
3367     nzcountb = nzcount;
3368     nmask    = 0;
3369     for (j=0; j<bs; j++) {
3370       kmax = rowlengths[i*bs+j];
3371       for (k=0; k<kmax; k++) {
3372         tmp = jj[nzcount++]/bs;
3373         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3374       }
3375     }
3376     /* sort the masked values */
3377     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3378 
3379     /* set "j" values into matrix */
3380     maskcount = 1;
3381     for (j=0; j<nmask; j++) {
3382       a->j[jcount++]  = masked[j];
3383       mask[masked[j]] = maskcount++;
3384     }
3385     /* set "a" values into matrix */
3386     ishift = bs2*a->i[i];
3387     for (j=0; j<bs; j++) {
3388       kmax = rowlengths[i*bs+j];
3389       for (k=0; k<kmax; k++) {
3390         tmp       = jj[nzcountb]/bs;
3391         block     = mask[tmp] - 1;
3392         point     = jj[nzcountb] - bs*tmp;
3393         idx       = ishift + bs2*block + j + bs*point;
3394         a->a[idx] = (MatScalar)aa[nzcountb++];
3395       }
3396     }
3397     /* zero out the mask elements we set */
3398     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3399   }
3400   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3401 
3402   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3403   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3404   ierr = PetscFree(aa);CHKERRQ(ierr);
3405   ierr = PetscFree(jj);CHKERRQ(ierr);
3406   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3407 
3408   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3409   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatCreateSeqBAIJ"
3415 /*@C
3416    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3417    compressed row) format.  For good matrix assembly performance the
3418    user should preallocate the matrix storage by setting the parameter nz
3419    (or the array nnz).  By setting these parameters accurately, performance
3420    during matrix assembly can be increased by more than a factor of 50.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  comm - MPI communicator, set to PETSC_COMM_SELF
3426 .  bs - size of block
3427 .  m - number of rows
3428 .  n - number of columns
3429 .  nz - number of nonzero blocks  per block row (same for all rows)
3430 -  nnz - array containing the number of nonzero blocks in the various block rows
3431          (possibly different for each block row) or NULL
3432 
3433    Output Parameter:
3434 .  A - the matrix
3435 
3436    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3437    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3438    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3439 
3440    Options Database Keys:
3441 .   -mat_no_unroll - uses code that does not unroll the loops in the
3442                      block calculations (much slower)
3443 .    -mat_block_size - size of the blocks to use
3444 
3445    Level: intermediate
3446 
3447    Notes:
3448    The number of rows and columns must be divisible by blocksize.
3449 
3450    If the nnz parameter is given then the nz parameter is ignored
3451 
3452    A nonzero block is any block that as 1 or more nonzeros in it
3453 
3454    The block AIJ format is fully compatible with standard Fortran 77
3455    storage.  That is, the stored row and column indices can begin at
3456    either one (as in Fortran) or zero.  See the users' manual for details.
3457 
3458    Specify the preallocated storage with either nz or nnz (not both).
3459    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3460    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3461    matrices.
3462 
3463 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3464 @*/
3465 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3466 {
3467   PetscErrorCode ierr;
3468 
3469   PetscFunctionBegin;
3470   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3471   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3472   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3473   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 #undef __FUNCT__
3478 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3479 /*@C
3480    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3481    per row in the matrix. For good matrix assembly performance the
3482    user should preallocate the matrix storage by setting the parameter nz
3483    (or the array nnz).  By setting these parameters accurately, performance
3484    during matrix assembly can be increased by more than a factor of 50.
3485 
3486    Collective on MPI_Comm
3487 
3488    Input Parameters:
3489 +  A - the matrix
3490 .  bs - size of block
3491 .  nz - number of block nonzeros per block row (same for all rows)
3492 -  nnz - array containing the number of block nonzeros in the various block rows
3493          (possibly different for each block row) or NULL
3494 
3495    Options Database Keys:
3496 .   -mat_no_unroll - uses code that does not unroll the loops in the
3497                      block calculations (much slower)
3498 .    -mat_block_size - size of the blocks to use
3499 
3500    Level: intermediate
3501 
3502    Notes:
3503    If the nnz parameter is given then the nz parameter is ignored
3504 
3505    You can call MatGetInfo() to get information on how effective the preallocation was;
3506    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3507    You can also run with the option -info and look for messages with the string
3508    malloc in them to see if additional memory allocation was needed.
3509 
3510    The block AIJ format is fully compatible with standard Fortran 77
3511    storage.  That is, the stored row and column indices can begin at
3512    either one (as in Fortran) or zero.  See the users' manual for details.
3513 
3514    Specify the preallocated storage with either nz or nnz (not both).
3515    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3516    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3517 
3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3519 @*/
3520 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3521 {
3522   PetscErrorCode ierr;
3523 
3524   PetscFunctionBegin;
3525   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3526   PetscValidType(B,1);
3527   PetscValidLogicalCollectiveInt(B,bs,2);
3528   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 #undef __FUNCT__
3533 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3534 /*@C
3535    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3536    (the default sequential PETSc format).
3537 
3538    Collective on MPI_Comm
3539 
3540    Input Parameters:
3541 +  A - the matrix
3542 .  i - the indices into j for the start of each local row (starts with zero)
3543 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3544 -  v - optional values in the matrix
3545 
3546    Level: developer
3547 
3548 .keywords: matrix, aij, compressed row, sparse
3549 
3550 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3551 @*/
3552 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3553 {
3554   PetscErrorCode ierr;
3555 
3556   PetscFunctionBegin;
3557   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3558   PetscValidType(B,1);
3559   PetscValidLogicalCollectiveInt(B,bs,2);
3560   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3567 /*@
3568      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3569 
3570      Collective on MPI_Comm
3571 
3572    Input Parameters:
3573 +  comm - must be an MPI communicator of size 1
3574 .  bs - size of block
3575 .  m - number of rows
3576 .  n - number of columns
3577 .  i - row indices
3578 .  j - column indices
3579 -  a - matrix values
3580 
3581    Output Parameter:
3582 .  mat - the matrix
3583 
3584    Level: advanced
3585 
3586    Notes:
3587        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3588     once the matrix is destroyed
3589 
3590        You cannot set new nonzero locations into this matrix, that will generate an error.
3591 
3592        The i and j indices are 0 based
3593 
3594        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3595 
3596 
3597 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3598 
3599 @*/
3600 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3601 {
3602   PetscErrorCode ierr;
3603   PetscInt       ii;
3604   Mat_SeqBAIJ    *baij;
3605 
3606   PetscFunctionBegin;
3607   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3608   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3609 
3610   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3611   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3612   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3613   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3614   baij = (Mat_SeqBAIJ*)(*mat)->data;
3615   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3616   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3617 
3618   baij->i = i;
3619   baij->j = j;
3620   baij->a = a;
3621 
3622   baij->singlemalloc = PETSC_FALSE;
3623   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3624   baij->free_a       = PETSC_FALSE;
3625   baij->free_ij      = PETSC_FALSE;
3626 
3627   for (ii=0; ii<m; ii++) {
3628     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3629 #if defined(PETSC_USE_DEBUG)
3630     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3631 #endif
3632   }
3633 #if defined(PETSC_USE_DEBUG)
3634   for (ii=0; ii<baij->i[m]; ii++) {
3635     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3636     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3637   }
3638 #endif
3639 
3640   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642   PetscFunctionReturn(0);
3643 }
3644