xref: /petsc/src/mat/impls/baij/seq/baij.c (revision eb3f19e46b38d5d7d03c8a3cfecdfb705cfcba06)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
156   }
157   if (!a->sor_workt) {
158     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
159   }
160   if (!a->sor_work) {
161     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
162   }
163   work = a->mult_work;
164   t    = a->sor_workt;
165   w    = a->sor_work;
166 
167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
169 
170   if (flag & SOR_ZERO_INITIAL_GUESS) {
171     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
172       switch (bs) {
173       case 1:
174         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
175         t[0] = b[0];
176         i2     = 1;
177         idiag += 1;
178         for (i=1; i<m; i++) {
179           v  = aa + ai[i];
180           vi = aj + ai[i];
181           nz = diag[i] - ai[i];
182           s[0] = b[i2];
183           for (j=0; j<nz; j++) {
184             xw[0] = x[vi[j]];
185             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
186           }
187           t[i2] = s[0];
188           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
189           x[i2]  = xw[0];
190           idiag += 1;
191           i2    += 1;
192         }
193         break;
194       case 2:
195         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
196         t[0] = b[0]; t[1] = b[1];
197         i2     = 2;
198         idiag += 4;
199         for (i=1; i<m; i++) {
200           v  = aa + 4*ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2]; s[1] = b[i2+1];
204           for (j=0; j<nz; j++) {
205             idx = 2*vi[j];
206             it  = 4*j;
207             xw[0] = x[idx]; xw[1] = x[1+idx];
208             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
209           }
210           t[i2] = s[0]; t[i2+1] = s[1];
211           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
212           x[i2]   = xw[0]; x[i2+1] = xw[1];
213           idiag  += 4;
214           i2     += 2;
215         }
216         break;
217       case 3:
218         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
219         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
220         i2     = 3;
221         idiag += 9;
222         for (i=1; i<m; i++) {
223           v  = aa + 9*ai[i];
224           vi = aj + ai[i];
225           nz = diag[i] - ai[i];
226           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
227           while (nz--) {
228             idx = 3*(*vi++);
229             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
230             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
231             v  += 9;
232           }
233           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
234           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
235           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
236           idiag  += 9;
237           i2     += 3;
238         }
239         break;
240       case 4:
241         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
242         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
243         i2     = 4;
244         idiag += 16;
245         for (i=1; i<m; i++) {
246           v  = aa + 16*ai[i];
247           vi = aj + ai[i];
248           nz = diag[i] - ai[i];
249           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
250           while (nz--) {
251             idx = 4*(*vi++);
252             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
253             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
254             v  += 16;
255           }
256           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
257           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
258           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
259           idiag  += 16;
260           i2     += 4;
261         }
262         break;
263       case 5:
264         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
266         i2     = 5;
267         idiag += 25;
268         for (i=1; i<m; i++) {
269           v  = aa + 25*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
273           while (nz--) {
274             idx = 5*(*vi++);
275             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
276             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
277             v  += 25;
278           }
279           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
280           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
281           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
282           idiag  += 25;
283           i2     += 5;
284         }
285         break;
286       case 6:
287         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
289         i2     = 6;
290         idiag += 36;
291         for (i=1; i<m; i++) {
292           v  = aa + 36*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
296           while (nz--) {
297             idx = 6*(*vi++);
298             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
300             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
301             v  += 36;
302           }
303           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
304           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
305           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
307           idiag  += 36;
308           i2     += 6;
309         }
310         break;
311       case 7:
312         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
314         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
315         i2     = 7;
316         idiag += 49;
317         for (i=1; i<m; i++) {
318           v  = aa + 49*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
322           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
323           while (nz--) {
324             idx = 7*(*vi++);
325             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
326             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
327             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
328             v  += 49;
329           }
330           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
331           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
332           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
333           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
334           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
335           idiag  += 49;
336           i2     += 7;
337         }
338         break;
339       default:
340         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
341         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
342         i2     = bs;
343         idiag += bs2;
344         for (i=1; i<m; i++) {
345           v  = aa + bs2*ai[i];
346           vi = aj + ai[i];
347           nz = diag[i] - ai[i];
348 
349           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350           /* copy all rows of x that are needed into contiguous space */
351           workt = work;
352           for (j=0; j<nz; j++) {
353             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
354             workt += bs;
355           }
356           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
357           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
359 
360           idiag += bs2;
361           i2    += bs;
362         }
363         break;
364       }
365       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
366       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
367       xb = t;
368     }
369     else xb = b;
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
371       idiag = a->idiag+bs2*(a->mbs-1);
372       i2 = bs * (m-1);
373       switch (bs) {
374       case 1:
375         s[0]  = xb[i2];
376         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
377         x[i2] = xw[0];
378         i2   -= 1;
379         for (i=m-2; i>=0; i--) {
380           v  = aa + (diag[i]+1);
381           vi = aj + diag[i] + 1;
382           nz = ai[i+1] - diag[i] - 1;
383           s[0] = xb[i2];
384           for (j=0; j<nz; j++) {
385             xw[0] = x[vi[j]];
386             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
387           }
388           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
389           x[i2]  = xw[0];
390           idiag -= 1;
391           i2    -= 1;
392         }
393         break;
394       case 2:
395         s[0]  = xb[i2]; s[1] = xb[i2+1];
396         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
397         x[i2] = xw[0]; x[i2+1] = xw[1];
398         i2    -= 2;
399         idiag -= 4;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + 4*(diag[i] + 1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2]; s[1] = xb[i2+1];
405           for (j=0; j<nz; j++) {
406             idx = 2*vi[j];
407             it  = 4*j;
408             xw[0] = x[idx]; xw[1] = x[1+idx];
409             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
410           }
411           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
412           x[i2]   = xw[0]; x[i2+1] = xw[1];
413           idiag  -= 4;
414           i2     -= 2;
415         }
416         break;
417       case 3:
418         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
419         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
420         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
421         i2    -= 3;
422         idiag -= 9;
423         for (i=m-2; i>=0; i--) {
424           v  = aa + 9*(diag[i]+1);
425           vi = aj + diag[i] + 1;
426           nz = ai[i+1] - diag[i] - 1;
427           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
428           while (nz--) {
429             idx = 3*(*vi++);
430             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
431             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
432             v  += 9;
433           }
434           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
435           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
436           idiag  -= 9;
437           i2     -= 3;
438         }
439         break;
440       case 4:
441         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
442         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
443         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
444         i2    -= 4;
445         idiag -= 16;
446         for (i=m-2; i>=0; i--) {
447           v  = aa + 16*(diag[i]+1);
448           vi = aj + diag[i] + 1;
449           nz = ai[i+1] - diag[i] - 1;
450           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
451           while (nz--) {
452             idx = 4*(*vi++);
453             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
454             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
455             v  += 16;
456           }
457           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
458           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
459           idiag  -= 16;
460           i2     -= 4;
461         }
462         break;
463       case 5:
464         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
465         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
467         i2    -= 5;
468         idiag -= 25;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 25*(diag[i]+1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
474           while (nz--) {
475             idx = 5*(*vi++);
476             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
477             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
478             v  += 25;
479           }
480           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
481           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
482           idiag  -= 25;
483           i2     -= 5;
484         }
485         break;
486       case 6:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
488         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
490         i2    -= 6;
491         idiag -= 36;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 36*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
497           while (nz--) {
498             idx = 6*(*vi++);
499             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
501             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
502             v  += 36;
503           }
504           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
505           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
506           idiag  -= 36;
507           i2     -= 6;
508         }
509         break;
510       case 7:
511         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
512         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
513         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
514         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
515         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
516         i2    -= 7;
517         idiag -= 49;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 49*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524           while (nz--) {
525             idx = 7*(*vi++);
526             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
527             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
528             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
529             v  += 49;
530           }
531           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
532           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534           idiag  -= 49;
535           i2     -= 7;
536         }
537         break;
538       default:
539         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
540         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
541         i2    -= bs;
542         idiag -= bs2;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + bs2*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547 
548           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           /* copy all rows of x that are needed into contiguous space */
550           workt = work;
551           for (j=0; j<nz; j++) {
552             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
553             workt += bs;
554           }
555           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
556           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
557 
558           idiag -= bs2;
559           i2    -= bs;
560         }
561         break;
562       }
563       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
564     }
565     its--;
566   }
567   while (its--) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       idiag = a->idiag;
570       i2 = 0;
571       switch (bs) {
572       case 1:
573         for (i=0; i<m; i++) {
574           v  = aa + ai[i];
575           vi = aj + ai[i];
576           nz = ai[i+1] - ai[i];
577           s[0] = b[i2];
578           for (j=0; j<nz; j++) {
579             xw[0] = x[vi[j]];
580             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
581           }
582           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
583           x[i2] += xw[0];
584           idiag += 1;
585           i2    += 1;
586         }
587         break;
588       case 2:
589         for (i=0; i<m; i++) {
590           v  = aa + 4*ai[i];
591           vi = aj + ai[i];
592           nz = ai[i+1] - ai[i];
593           s[0] = b[i2]; s[1] = b[i2+1];
594           for (j=0; j<nz; j++) {
595             idx = 2*vi[j];
596             it  = 4*j;
597             xw[0] = x[idx]; xw[1] = x[1+idx];
598             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
599           }
600           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
601           x[i2]  += xw[0]; x[i2+1] += xw[1];
602           idiag  += 4;
603           i2     += 2;
604         }
605         break;
606       case 3:
607         for (i=0; i<m; i++) {
608           v  = aa + 9*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
612           while (nz--) {
613             idx = 3*(*vi++);
614             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
615             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
616             v  += 9;
617           }
618           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
619           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
620           idiag  += 9;
621           i2     += 3;
622         }
623         break;
624       case 4:
625         for (i=0; i<m; i++) {
626           v  = aa + 16*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
630           while (nz--) {
631             idx = 4*(*vi++);
632             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
634             v  += 16;
635           }
636           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
638           idiag  += 16;
639           i2     += 4;
640         }
641         break;
642       case 5:
643         for (i=0; i<m; i++) {
644           v  = aa + 25*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
648           while (nz--) {
649             idx = 5*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
652             v  += 25;
653           }
654           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
656           idiag  += 25;
657           i2     += 5;
658         }
659         break;
660       case 6:
661         for (i=0; i<m; i++) {
662           v  = aa + 36*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
666           while (nz--) {
667             idx = 6*(*vi++);
668             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
669             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
671             v  += 36;
672           }
673           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
675           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
676           idiag  += 36;
677           i2     += 6;
678         }
679         break;
680       case 7:
681         for (i=0; i<m; i++) {
682           v  = aa + 49*ai[i];
683           vi = aj + ai[i];
684           nz = ai[i+1] - ai[i];
685           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
686           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
687           while (nz--) {
688             idx = 7*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
692             v  += 49;
693           }
694           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
695           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
697           idiag  += 49;
698           i2     += 7;
699         }
700         break;
701       default:
702         for (i=0; i<m; i++) {
703           v  = aa + bs2*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706 
707           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
708           /* copy all rows of x that are needed into contiguous space */
709           workt = work;
710           for (j=0; j<nz; j++) {
711             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
712             workt += bs;
713           }
714           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
715           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
716 
717           idiag += bs2;
718           i2    += bs;
719         }
720         break;
721       }
722       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
723     }
724     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
725       idiag = a->idiag+bs2*(a->mbs-1);
726       i2 = bs * (m-1);
727       switch (bs) {
728       case 1:
729         for (i=m-1; i>=0; i--) {
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = ai[i+1] - ai[i];
733           s[0] = b[i2];
734           for (j=0; j<nz; j++) {
735             xw[0] = x[vi[j]];
736             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
737           }
738           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
739           x[i2] += xw[0];
740           idiag -= 1;
741           i2    -= 1;
742         }
743         break;
744       case 2:
745         for (i=m-1; i>=0; i--) {
746           v  = aa + 4*ai[i];
747           vi = aj + ai[i];
748           nz = ai[i+1] - ai[i];
749           s[0] = b[i2]; s[1] = b[i2+1];
750           for (j=0; j<nz; j++) {
751             idx = 2*vi[j];
752             it  = 4*j;
753             xw[0] = x[idx]; xw[1] = x[1+idx];
754             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
755           }
756           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
757           x[i2]  += xw[0]; x[i2+1] += xw[1];
758           idiag  -= 4;
759           i2     -= 2;
760         }
761         break;
762       case 3:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 9*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
768           while (nz--) {
769             idx = 3*(*vi++);
770             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
771             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
772             v  += 9;
773           }
774           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
775           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
776           idiag  -= 9;
777           i2     -= 3;
778         }
779         break;
780       case 4:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 16*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
786           while (nz--) {
787             idx = 4*(*vi++);
788             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
790             v  += 16;
791           }
792           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
794           idiag  -= 16;
795           i2     -= 4;
796         }
797         break;
798       case 5:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 25*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
804           while (nz--) {
805             idx = 5*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
808             v  += 25;
809           }
810           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
812           idiag  -= 25;
813           i2     -= 5;
814         }
815         break;
816       case 6:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 36*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
822           while (nz--) {
823             idx = 6*(*vi++);
824             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
825             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
827             v  += 36;
828           }
829           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
831           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
832           idiag  -= 36;
833           i2     -= 6;
834         }
835         break;
836       case 7:
837         for (i=m-1; i>=0; i--) {
838           v  = aa + 49*ai[i];
839           vi = aj + ai[i];
840           nz = ai[i+1] - ai[i];
841           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
842           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
843           while (nz--) {
844             idx = 7*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
848             v  += 49;
849           }
850           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
851           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
853           idiag  -= 49;
854           i2     -= 7;
855         }
856         break;
857       default:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + bs2*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862 
863           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           /* copy all rows of x that are needed into contiguous space */
865           workt = work;
866           for (j=0; j<nz; j++) {
867             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
868             workt += bs;
869           }
870           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
871           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
872 
873           idiag -= bs2;
874           i2    -= bs;
875         }
876         break;
877       }
878       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
879     }
880   }
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 
887 /*
888     Special version for direct calls from Fortran (Used in PETSc-fun3d)
889 */
890 #if defined(PETSC_HAVE_FORTRAN_CAPS)
891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
893 #define matsetvaluesblocked4_ matsetvaluesblocked4
894 #endif
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "matsetvaluesblocked4_"
898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
899 {
900   Mat               A  = *AA;
901   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
902   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
903   PetscInt          *ai    =a->i,*ailen=a->ilen;
904   PetscInt          *aj    =a->j,stepval,lastcol = -1;
905   const PetscScalar *value = v;
906   MatScalar         *ap,*aa = a->a,*bap;
907 
908   PetscFunctionBegin;
909   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
910   stepval = (n-1)*4;
911   for (k=0; k<m; k++) { /* loop over added rows */
912     row  = im[k];
913     rp   = aj + ai[row];
914     ap   = aa + 16*ai[row];
915     nrow = ailen[row];
916     low  = 0;
917     high = nrow;
918     for (l=0; l<n; l++) { /* loop over added columns */
919       col = in[l];
920       if (col <= lastcol)  low = 0;
921       else                high = nrow;
922       lastcol = col;
923       value   = v + k*(stepval+4 + l)*4;
924       while (high-low > 7) {
925         t = (low+high)/2;
926         if (rp[t] > col) high = t;
927         else             low  = t;
928       }
929       for (i=low; i<high; i++) {
930         if (rp[i] > col) break;
931         if (rp[i] == col) {
932           bap = ap +  16*i;
933           for (ii=0; ii<4; ii++,value+=stepval) {
934             for (jj=ii; jj<16; jj+=4) {
935               bap[jj] += *value++;
936             }
937           }
938           goto noinsert2;
939         }
940       }
941       N = nrow++ - 1;
942       high++; /* added new column index thus must search to one higher than before */
943       /* shift up all the later entries in this row */
944       for (ii=N; ii>=i; ii--) {
945         rp[ii+1] = rp[ii];
946         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
947       }
948       if (N >= i) {
949         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
950       }
951       rp[i] = col;
952       bap   = ap +  16*i;
953       for (ii=0; ii<4; ii++,value+=stepval) {
954         for (jj=ii; jj<16; jj+=4) {
955           bap[jj] = *value++;
956         }
957       }
958       noinsert2:;
959       low = i;
960     }
961     ailen[row] = nrow;
962   }
963   PetscFunctionReturnVoid();
964 }
965 
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvalues4_ MATSETVALUES4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvalues4_ matsetvalues4
970 #endif
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSetValues4_"
974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
975 {
976   Mat         A  = *AA;
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
979   PetscInt    *ai=a->i,*ailen=a->ilen;
980   PetscInt    *aj=a->j,brow,bcol;
981   PetscInt    ridx,cidx,lastcol = -1;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for (k=0; k<m; k++) { /* loop over added rows */
986     row  = im[k]; brow = row/4;
987     rp   = aj + ai[brow];
988     ap   = aa + 16*ai[brow];
989     nrow = ailen[brow];
990     low  = 0;
991     high = nrow;
992     for (l=0; l<n; l++) { /* loop over added columns */
993       col   = in[l]; bcol = col/4;
994       ridx  = row % 4; cidx = col % 4;
995       value = v[l + k*n];
996       if (col <= lastcol)  low = 0;
997       else                high = nrow;
998       lastcol = col;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap   = ap +  16*i + 4*cidx + ridx;
1008           *bap += value;
1009           goto noinsert1;
1010         }
1011       }
1012       N = nrow++ - 1;
1013       high++; /* added new column thus must search to one higher than before */
1014       /* shift up all the later entries in this row */
1015       for (ii=N; ii>=i; ii--) {
1016         rp[ii+1] = rp[ii];
1017         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1018       }
1019       if (N>=i) {
1020         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1021       }
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1094 {
1095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1098   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1099 
1100   PetscFunctionBegin;
1101   *nn = n;
1102   if (!ia) PetscFunctionReturn(0);
1103   if (symmetric) {
1104     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1105     nz   = tia[n];
1106   } else {
1107     tia = a->i; tja = a->j;
1108   }
1109 
1110   if (!blockcompressed && bs > 1) {
1111     (*nn) *= bs;
1112     /* malloc & create the natural set of indices */
1113     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1114     if (n) {
1115       (*ia)[0] = 0;
1116       for (j=1; j<bs; j++) {
1117         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1118       }
1119     }
1120 
1121     for (i=1; i<n; i++) {
1122       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1123       for (j=1; j<bs; j++) {
1124         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1125       }
1126     }
1127     if (n) {
1128       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1129     }
1130 
1131     if (inja) {
1132       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1133       cnt = 0;
1134       for (i=0; i<n; i++) {
1135         for (j=0; j<bs; j++) {
1136           for (k=tia[i]; k<tia[i+1]; k++) {
1137             for (l=0; l<bs; l++) {
1138               (*ja)[cnt++] = bs*tja[k] + l;
1139             }
1140           }
1141         }
1142       }
1143     }
1144 
1145     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1146       ierr = PetscFree(tia);CHKERRQ(ierr);
1147       ierr = PetscFree(tja);CHKERRQ(ierr);
1148     }
1149   } else if (oshift == 1) {
1150     if (symmetric) {
1151       nz = tia[A->rmap->n/bs];
1152       /*  add 1 to i and j indices */
1153       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1154       *ia = tia;
1155       if (ja) {
1156         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1157         *ja = tja;
1158       }
1159     } else {
1160       nz = a->i[A->rmap->n/bs];
1161       /* malloc space and  add 1 to i and j indices */
1162       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1163       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1164       if (ja) {
1165         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1166         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1167       }
1168     }
1169   } else {
1170     *ia = tia;
1171     if (ja) *ja = tja;
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1179 {
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   if (!ia) PetscFunctionReturn(0);
1184   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1185     ierr = PetscFree(*ia);CHKERRQ(ierr);
1186     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1194 {
1195   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1201 #endif
1202   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1205   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1207   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1209   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1212   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1213   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1214   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1215   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1216 
1217   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1218   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1219   ierr = PetscFree(A->data);CHKERRQ(ierr);
1220 
1221   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1237 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   switch (op) {
1244   case MAT_ROW_ORIENTED:
1245     a->roworiented = flg;
1246     break;
1247   case MAT_KEEP_NONZERO_PATTERN:
1248     a->keepnonzeropattern = flg;
1249     break;
1250   case MAT_NEW_NONZERO_LOCATIONS:
1251     a->nonew = (flg ? 0 : 1);
1252     break;
1253   case MAT_NEW_NONZERO_LOCATION_ERR:
1254     a->nonew = (flg ? -1 : 0);
1255     break;
1256   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1257     a->nonew = (flg ? -2 : 0);
1258     break;
1259   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1260     a->nounused = (flg ? -1 : 0);
1261     break;
1262   case MAT_NEW_DIAGONALS:
1263   case MAT_IGNORE_OFF_PROC_ENTRIES:
1264   case MAT_USE_HASH_TABLE:
1265     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1266     break;
1267   case MAT_SPD:
1268   case MAT_SYMMETRIC:
1269   case MAT_STRUCTURALLY_SYMMETRIC:
1270   case MAT_HERMITIAN:
1271   case MAT_SYMMETRY_ETERNAL:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1283 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1284 {
1285   PetscErrorCode ierr;
1286   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1287   MatScalar      *aa_i;
1288   PetscScalar    *v_i;
1289 
1290   PetscFunctionBegin;
1291   bs  = A->rmap->bs;
1292   bs2 = bs*bs;
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1328 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1340 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1346   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355 {
1356   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1357   Mat            C;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360   PetscInt       *rows,*cols,bs2=a->bs2;
1361   MatScalar      *array;
1362 
1363   PetscFunctionBegin;
1364   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1366     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1367 
1368     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1370     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1371     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1373     ierr = PetscFree(col);CHKERRQ(ierr);
1374   } else {
1375     C = *B;
1376   }
1377 
1378   array = a->a;
1379   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1380   for (i=0; i<mbs; i++) {
1381     cols[0] = i*bs;
1382     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383     len = ai[i+1] - ai[i];
1384     for (j=0; j<len; j++) {
1385       rows[0] = (*aj++)*bs;
1386       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1388       array += bs2;
1389     }
1390   }
1391   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1392 
1393   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395 
1396   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1421 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1422 {
1423   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1424   PetscErrorCode ierr;
1425   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1426   int            fd;
1427   PetscScalar    *aa;
1428   FILE           *file;
1429 
1430   PetscFunctionBegin;
1431   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1432   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1433   col_lens[0] = MAT_FILE_CLASSID;
1434 
1435   col_lens[1] = A->rmap->N;
1436   col_lens[2] = A->cmap->n;
1437   col_lens[3] = a->nz*bs2;
1438 
1439   /* store lengths of each row and write (including header) to file */
1440   count = 0;
1441   for (i=0; i<a->mbs; i++) {
1442     for (j=0; j<bs; j++) {
1443       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1444     }
1445   }
1446   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1447   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1448 
1449   /* store column indices (zero start index) */
1450   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1451   count = 0;
1452   for (i=0; i<a->mbs; i++) {
1453     for (j=0; j<bs; j++) {
1454       for (k=a->i[i]; k<a->i[i+1]; k++) {
1455         for (l=0; l<bs; l++) {
1456           jj[count++] = bs*a->j[k] + l;
1457         }
1458       }
1459     }
1460   }
1461   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1462   ierr = PetscFree(jj);CHKERRQ(ierr);
1463 
1464   /* store nonzero values */
1465   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1466   count = 0;
1467   for (i=0; i<a->mbs; i++) {
1468     for (j=0; j<bs; j++) {
1469       for (k=a->i[i]; k<a->i[i+1]; k++) {
1470         for (l=0; l<bs; l++) {
1471           aa[count++] = a->a[bs2*k + l*bs + j];
1472         }
1473       }
1474     }
1475   }
1476   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1477   ierr = PetscFree(aa);CHKERRQ(ierr);
1478 
1479   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1480   if (file) {
1481     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489 {
1490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1491   PetscErrorCode    ierr;
1492   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493   PetscViewerFormat format;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1497   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500     const char *matname;
1501     Mat        aij;
1502     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1503     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1504     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1505     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1507   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1508       PetscFunctionReturn(0);
1509   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1510     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1511     for (i=0; i<a->mbs; i++) {
1512       for (j=0; j<bs; j++) {
1513         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1514         for (k=a->i[i]; k<a->i[i+1]; k++) {
1515           for (l=0; l<bs; l++) {
1516 #if defined(PETSC_USE_COMPLEX)
1517             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1519                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1520             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1522                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1523             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             }
1526 #else
1527             if (a->a[bs2*k + l*bs + j] != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1529             }
1530 #endif
1531           }
1532         }
1533         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1534       }
1535     }
1536     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1537   } else {
1538     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1539     for (i=0; i<a->mbs; i++) {
1540       for (j=0; j<bs; j++) {
1541         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1542         for (k=a->i[i]; k<a->i[i+1]; k++) {
1543           for (l=0; l<bs; l++) {
1544 #if defined(PETSC_USE_COMPLEX)
1545             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1547                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1550                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             } else {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             }
1554 #else
1555             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1556 #endif
1557           }
1558         }
1559         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1560       }
1561     }
1562     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1563   }
1564   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #include <petscdraw.h>
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1572 {
1573   Mat               A = (Mat) Aa;
1574   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1575   PetscErrorCode    ierr;
1576   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1577   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1578   MatScalar         *aa;
1579   PetscViewer       viewer;
1580   PetscViewerFormat format;
1581 
1582   PetscFunctionBegin;
1583   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1584   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1585 
1586   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1587 
1588   /* loop over matrix elements drawing boxes */
1589 
1590   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1591     color = PETSC_DRAW_BLUE;
1592     for (i=0,row=0; i<mbs; i++,row+=bs) {
1593       for (j=a->i[i]; j<a->i[i+1]; j++) {
1594         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1595         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1596         aa  = a->a + j*bs2;
1597         for (k=0; k<bs; k++) {
1598           for (l=0; l<bs; l++) {
1599             if (PetscRealPart(*aa++) >=  0.) continue;
1600             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1601           }
1602         }
1603       }
1604     }
1605     color = PETSC_DRAW_CYAN;
1606     for (i=0,row=0; i<mbs; i++,row+=bs) {
1607       for (j=a->i[i]; j<a->i[i+1]; j++) {
1608         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610         aa  = a->a + j*bs2;
1611         for (k=0; k<bs; k++) {
1612           for (l=0; l<bs; l++) {
1613             if (PetscRealPart(*aa++) != 0.) continue;
1614             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1615           }
1616         }
1617       }
1618     }
1619     color = PETSC_DRAW_RED;
1620     for (i=0,row=0; i<mbs; i++,row+=bs) {
1621       for (j=a->i[i]; j<a->i[i+1]; j++) {
1622         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624         aa  = a->a + j*bs2;
1625         for (k=0; k<bs; k++) {
1626           for (l=0; l<bs; l++) {
1627             if (PetscRealPart(*aa++) <= 0.) continue;
1628             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1629           }
1630         }
1631       }
1632     }
1633   } else {
1634     /* use contour shading to indicate magnitude of values */
1635     /* first determine max of all nonzero values */
1636     PetscDraw popup;
1637     PetscReal scale,maxv = 0.0;
1638 
1639     for (i=0; i<a->nz*a->bs2; i++) {
1640       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1641     }
1642     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1643     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1644     if (popup) {
1645       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1646     }
1647     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651         aa  = a->a + j*bs2;
1652         for (k=0; k<bs; k++) {
1653           for (l=0; l<bs; l++) {
1654             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1655             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1656           }
1657         }
1658       }
1659     }
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1666 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1667 {
1668   PetscErrorCode ierr;
1669   PetscReal      xl,yl,xr,yr,w,h;
1670   PetscDraw      draw;
1671   PetscBool      isnull;
1672 
1673   PetscFunctionBegin;
1674   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1675   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1676 
1677   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1678   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1679   xr  += w;    yr += h;  xl = -w;     yl = -h;
1680   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1681   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1682   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatView_SeqBAIJ"
1688 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1689 {
1690   PetscErrorCode ierr;
1691   PetscBool      iascii,isbinary,isdraw;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1697   if (iascii) {
1698     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1699   } else if (isbinary) {
1700     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1701   } else if (isdraw) {
1702     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1703   } else {
1704     Mat B;
1705     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1706     ierr = MatView(B,viewer);CHKERRQ(ierr);
1707     ierr = MatDestroy(&B);CHKERRQ(ierr);
1708   }
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1716 {
1717   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1718   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1719   PetscInt    *ai = a->i,*ailen = a->ilen;
1720   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1721   MatScalar   *ap,*aa = a->a;
1722 
1723   PetscFunctionBegin;
1724   for (k=0; k<m; k++) { /* loop over rows */
1725     row = im[k]; brow = row/bs;
1726     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1727     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1728     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1729     nrow = ailen[brow];
1730     for (l=0; l<n; l++) { /* loop over columns */
1731       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1732       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1733       col  = in[l];
1734       bcol = col/bs;
1735       cidx = col%bs;
1736       ridx = row%bs;
1737       high = nrow;
1738       low  = 0; /* assume unsorted */
1739       while (high-low > 5) {
1740         t = (low+high)/2;
1741         if (rp[t] > bcol) high = t;
1742         else             low  = t;
1743       }
1744       for (i=low; i<high; i++) {
1745         if (rp[i] > bcol) break;
1746         if (rp[i] == bcol) {
1747           *v++ = ap[bs2*i+bs*cidx+ridx];
1748           goto finished;
1749         }
1750       }
1751       *v++ = 0.0;
1752 finished:;
1753     }
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1760 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1761 {
1762   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1763   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1764   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1765   PetscErrorCode    ierr;
1766   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1767   PetscBool         roworiented=a->roworiented;
1768   const PetscScalar *value     = v;
1769   MatScalar         *ap,*aa = a->a,*bap;
1770 
1771   PetscFunctionBegin;
1772   if (roworiented) {
1773     stepval = (n-1)*bs;
1774   } else {
1775     stepval = (m-1)*bs;
1776   }
1777   for (k=0; k<m; k++) { /* loop over added rows */
1778     row = im[k];
1779     if (row < 0) continue;
1780 #if defined(PETSC_USE_DEBUG)
1781     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1782 #endif
1783     rp   = aj + ai[row];
1784     ap   = aa + bs2*ai[row];
1785     rmax = imax[row];
1786     nrow = ailen[row];
1787     low  = 0;
1788     high = nrow;
1789     for (l=0; l<n; l++) { /* loop over added columns */
1790       if (in[l] < 0) continue;
1791 #if defined(PETSC_USE_DEBUG)
1792       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1793 #endif
1794       col = in[l];
1795       if (roworiented) {
1796         value = v + (k*(stepval+bs) + l)*bs;
1797       } else {
1798         value = v + (l*(stepval+bs) + k)*bs;
1799       }
1800       if (col <= lastcol) low = 0;
1801       else high = nrow;
1802       lastcol = col;
1803       while (high-low > 7) {
1804         t = (low+high)/2;
1805         if (rp[t] > col) high = t;
1806         else             low  = t;
1807       }
1808       for (i=low; i<high; i++) {
1809         if (rp[i] > col) break;
1810         if (rp[i] == col) {
1811           bap = ap +  bs2*i;
1812           if (roworiented) {
1813             if (is == ADD_VALUES) {
1814               for (ii=0; ii<bs; ii++,value+=stepval) {
1815                 for (jj=ii; jj<bs2; jj+=bs) {
1816                   bap[jj] += *value++;
1817                 }
1818               }
1819             } else {
1820               for (ii=0; ii<bs; ii++,value+=stepval) {
1821                 for (jj=ii; jj<bs2; jj+=bs) {
1822                   bap[jj] = *value++;
1823                 }
1824               }
1825             }
1826           } else {
1827             if (is == ADD_VALUES) {
1828               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829                 for (jj=0; jj<bs; jj++) {
1830                   bap[jj] += value[jj];
1831                 }
1832                 bap += bs;
1833               }
1834             } else {
1835               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836                 for (jj=0; jj<bs; jj++) {
1837                   bap[jj]  = value[jj];
1838                 }
1839                 bap += bs;
1840               }
1841             }
1842           }
1843           goto noinsert2;
1844         }
1845       }
1846       if (nonew == 1) goto noinsert2;
1847       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1848       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1849       N = nrow++ - 1; high++;
1850       /* shift up all the later entries in this row */
1851       for (ii=N; ii>=i; ii--) {
1852         rp[ii+1] = rp[ii];
1853         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1854       }
1855       if (N >= i) {
1856         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1857       }
1858       rp[i] = col;
1859       bap   = ap +  bs2*i;
1860       if (roworiented) {
1861         for (ii=0; ii<bs; ii++,value+=stepval) {
1862           for (jj=ii; jj<bs2; jj+=bs) {
1863             bap[jj] = *value++;
1864           }
1865         }
1866       } else {
1867         for (ii=0; ii<bs; ii++,value+=stepval) {
1868           for (jj=0; jj<bs; jj++) {
1869             *bap++ = *value++;
1870           }
1871         }
1872       }
1873 noinsert2:;
1874       low = i;
1875     }
1876     ailen[row] = nrow;
1877   }
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1883 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1884 {
1885   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1886   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1887   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1888   PetscErrorCode ierr;
1889   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1890   MatScalar      *aa  = a->a,*ap;
1891   PetscReal      ratio=0.6;
1892 
1893   PetscFunctionBegin;
1894   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1895 
1896   if (m) rmax = ailen[0];
1897   for (i=1; i<mbs; i++) {
1898     /* move each row back by the amount of empty slots (fshift) before it*/
1899     fshift += imax[i-1] - ailen[i-1];
1900     rmax    = PetscMax(rmax,ailen[i]);
1901     if (fshift) {
1902       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1903       N  = ailen[i];
1904       for (j=0; j<N; j++) {
1905         ip[j-fshift] = ip[j];
1906 
1907         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1908       }
1909     }
1910     ai[i] = ai[i-1] + ailen[i-1];
1911   }
1912   if (mbs) {
1913     fshift += imax[mbs-1] - ailen[mbs-1];
1914     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1915   }
1916 
1917   /* reset ilen and imax for each row */
1918   a->nonzerorowcnt = 0;
1919   for (i=0; i<mbs; i++) {
1920     ailen[i] = imax[i] = ai[i+1] - ai[i];
1921     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1922   }
1923   a->nz = ai[mbs];
1924 
1925   /* diagonals may have moved, so kill the diagonal pointers */
1926   a->idiagvalid = PETSC_FALSE;
1927   if (fshift && a->diag) {
1928     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1929     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1930     a->diag = 0;
1931   }
1932   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1933   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1935   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1936 
1937   A->info.mallocs    += a->reallocs;
1938   a->reallocs         = 0;
1939   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1940 
1941   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 /*
1946    This function returns an array of flags which indicate the locations of contiguous
1947    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1948    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1949    Assume: sizes should be long enough to hold all the values.
1950 */
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1953 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1954 {
1955   PetscInt  i,j,k,row;
1956   PetscBool flg;
1957 
1958   PetscFunctionBegin;
1959   for (i=0,j=0; i<n; j++) {
1960     row = idx[i];
1961     if (row%bs!=0) { /* Not the begining of a block */
1962       sizes[j] = 1;
1963       i++;
1964     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1965       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1966       i++;
1967     } else { /* Begining of the block, so check if the complete block exists */
1968       flg = PETSC_TRUE;
1969       for (k=1; k<bs; k++) {
1970         if (row+k != idx[i+k]) { /* break in the block */
1971           flg = PETSC_FALSE;
1972           break;
1973         }
1974       }
1975       if (flg) { /* No break in the bs */
1976         sizes[j] = bs;
1977         i       += bs;
1978       } else {
1979         sizes[j] = 1;
1980         i++;
1981       }
1982     }
1983   }
1984   *bs_max = j;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1990 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1991 {
1992   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1993   PetscErrorCode    ierr;
1994   PetscInt          i,j,k,count,*rows;
1995   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1996   PetscScalar       zero = 0.0;
1997   MatScalar         *aa;
1998   const PetscScalar *xx;
1999   PetscScalar       *bb;
2000 
2001   PetscFunctionBegin;
2002   /* fix right hand side if needed */
2003   if (x && b) {
2004     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2005     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2006     for (i=0; i<is_n; i++) {
2007       bb[is_idx[i]] = diag*xx[is_idx[i]];
2008     }
2009     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2010     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2011   }
2012 
2013   /* Make a copy of the IS and  sort it */
2014   /* allocate memory for rows,sizes */
2015   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2016 
2017   /* copy IS values to rows, and sort them */
2018   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2019   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2020 
2021   if (baij->keepnonzeropattern) {
2022     for (i=0; i<is_n; i++) sizes[i] = 1;
2023     bs_max          = is_n;
2024   } else {
2025     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2026     A->nonzerostate++;
2027   }
2028 
2029   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2030     row = rows[j];
2031     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2032     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2033     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2034     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2035       if (diag != (PetscScalar)0.0) {
2036         if (baij->ilen[row/bs] > 0) {
2037           baij->ilen[row/bs]       = 1;
2038           baij->j[baij->i[row/bs]] = row/bs;
2039 
2040           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2041         }
2042         /* Now insert all the diagonal values for this bs */
2043         for (k=0; k<bs; k++) {
2044           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2045         }
2046       } else { /* (diag == 0.0) */
2047         baij->ilen[row/bs] = 0;
2048       } /* end (diag == 0.0) */
2049     } else { /* (sizes[i] != bs) */
2050 #if defined(PETSC_USE_DEBUG)
2051       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2052 #endif
2053       for (k=0; k<count; k++) {
2054         aa[0] =  zero;
2055         aa   += bs;
2056       }
2057       if (diag != (PetscScalar)0.0) {
2058         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059       }
2060     }
2061   }
2062 
2063   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2070 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2071 {
2072   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2073   PetscErrorCode    ierr;
2074   PetscInt          i,j,k,count;
2075   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2076   PetscScalar       zero = 0.0;
2077   MatScalar         *aa;
2078   const PetscScalar *xx;
2079   PetscScalar       *bb;
2080   PetscBool         *zeroed,vecs = PETSC_FALSE;
2081 
2082   PetscFunctionBegin;
2083   /* fix right hand side if needed */
2084   if (x && b) {
2085     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2086     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2087     vecs = PETSC_TRUE;
2088   }
2089 
2090   /* zero the columns */
2091   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2092   for (i=0; i<is_n; i++) {
2093     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2094     zeroed[is_idx[i]] = PETSC_TRUE;
2095   }
2096   for (i=0; i<A->rmap->N; i++) {
2097     if (!zeroed[i]) {
2098       row = i/bs;
2099       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2100         for (k=0; k<bs; k++) {
2101           col = bs*baij->j[j] + k;
2102           if (zeroed[col]) {
2103             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2104             if (vecs) bb[i] -= aa[0]*xx[col];
2105             aa[0] = 0.0;
2106           }
2107         }
2108       }
2109     } else if (vecs) bb[i] = diag*xx[i];
2110   }
2111   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2112   if (vecs) {
2113     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2114     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2115   }
2116 
2117   /* zero the rows */
2118   for (i=0; i<is_n; i++) {
2119     row   = is_idx[i];
2120     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2121     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2122     for (k=0; k<count; k++) {
2123       aa[0] =  zero;
2124       aa   += bs;
2125     }
2126     if (diag != (PetscScalar)0.0) {
2127       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128     }
2129   }
2130   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2137 {
2138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2139   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2140   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2141   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2142   PetscErrorCode ierr;
2143   PetscInt       ridx,cidx,bs2=a->bs2;
2144   PetscBool      roworiented=a->roworiented;
2145   MatScalar      *ap,value,*aa=a->a,*bap;
2146 
2147   PetscFunctionBegin;
2148   for (k=0; k<m; k++) { /* loop over added rows */
2149     row  = im[k];
2150     brow = row/bs;
2151     if (row < 0) continue;
2152 #if defined(PETSC_USE_DEBUG)
2153     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2154 #endif
2155     rp   = aj + ai[brow];
2156     ap   = aa + bs2*ai[brow];
2157     rmax = imax[brow];
2158     nrow = ailen[brow];
2159     low  = 0;
2160     high = nrow;
2161     for (l=0; l<n; l++) { /* loop over added columns */
2162       if (in[l] < 0) continue;
2163 #if defined(PETSC_USE_DEBUG)
2164       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165 #endif
2166       col  = in[l]; bcol = col/bs;
2167       ridx = row % bs; cidx = col % bs;
2168       if (roworiented) {
2169         value = v[l + k*n];
2170       } else {
2171         value = v[k + l*m];
2172       }
2173       if (col <= lastcol) low = 0; else high = nrow;
2174       lastcol = col;
2175       while (high-low > 7) {
2176         t = (low+high)/2;
2177         if (rp[t] > bcol) high = t;
2178         else              low  = t;
2179       }
2180       for (i=low; i<high; i++) {
2181         if (rp[i] > bcol) break;
2182         if (rp[i] == bcol) {
2183           bap = ap +  bs2*i + bs*cidx + ridx;
2184           if (is == ADD_VALUES) *bap += value;
2185           else                  *bap  = value;
2186           goto noinsert1;
2187         }
2188       }
2189       if (nonew == 1) goto noinsert1;
2190       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2191       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2192       N = nrow++ - 1; high++;
2193       /* shift up all the later entries in this row */
2194       for (ii=N; ii>=i; ii--) {
2195         rp[ii+1] = rp[ii];
2196         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       if (N>=i) {
2199         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2200       }
2201       rp[i]                      = bcol;
2202       ap[bs2*i + bs*cidx + ridx] = value;
2203       a->nz++;
2204       A->nonzerostate++;
2205 noinsert1:;
2206       low = i;
2207     }
2208     ailen[brow] = nrow;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2216 {
2217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2218   Mat            outA;
2219   PetscErrorCode ierr;
2220   PetscBool      row_identity,col_identity;
2221 
2222   PetscFunctionBegin;
2223   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2224   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2225   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2226   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2227 
2228   outA            = inA;
2229   inA->factortype = MAT_FACTOR_LU;
2230 
2231   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2232 
2233   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2235   a->row = row;
2236   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2237   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2238   a->col = col;
2239 
2240   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2241   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2242   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2243   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2244 
2245   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2246   if (!a->solve_work) {
2247     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2248     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2249   }
2250   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2256 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2257 {
2258   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2259   PetscInt    i,nz,mbs;
2260 
2261   PetscFunctionBegin;
2262   nz  = baij->maxnz;
2263   mbs = baij->mbs;
2264   for (i=0; i<nz; i++) {
2265     baij->j[i] = indices[i];
2266   }
2267   baij->nz = nz;
2268   for (i=0; i<mbs; i++) {
2269     baij->ilen[i] = baij->imax[i];
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2276 /*@
2277     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2278        in the matrix.
2279 
2280   Input Parameters:
2281 +  mat - the SeqBAIJ matrix
2282 -  indices - the column indices
2283 
2284   Level: advanced
2285 
2286   Notes:
2287     This can be called if you have precomputed the nonzero structure of the
2288   matrix and want to provide it to the matrix object to improve the performance
2289   of the MatSetValues() operation.
2290 
2291     You MUST have set the correct numbers of nonzeros per row in the call to
2292   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2293 
2294     MUST be called before any calls to MatSetValues();
2295 
2296 @*/
2297 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2303   PetscValidPointer(indices,2);
2304   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2310 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2311 {
2312   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2313   PetscErrorCode ierr;
2314   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2315   PetscReal      atmp;
2316   PetscScalar    *x,zero = 0.0;
2317   MatScalar      *aa;
2318   PetscInt       ncols,brow,krow,kcol;
2319 
2320   PetscFunctionBegin;
2321   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2322   bs  = A->rmap->bs;
2323   aa  = a->a;
2324   ai  = a->i;
2325   aj  = a->j;
2326   mbs = a->mbs;
2327 
2328   ierr = VecSet(v,zero);CHKERRQ(ierr);
2329   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2330   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2331   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2332   for (i=0; i<mbs; i++) {
2333     ncols = ai[1] - ai[0]; ai++;
2334     brow  = bs*i;
2335     for (j=0; j<ncols; j++) {
2336       for (kcol=0; kcol<bs; kcol++) {
2337         for (krow=0; krow<bs; krow++) {
2338           atmp = PetscAbsScalar(*aa);aa++;
2339           row  = brow + krow;   /* row index */
2340           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2341         }
2342       }
2343       aj++;
2344     }
2345   }
2346   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatCopy_SeqBAIJ"
2352 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2353 {
2354   PetscErrorCode ierr;
2355 
2356   PetscFunctionBegin;
2357   /* If the two matrices have the same copy implementation, use fast copy. */
2358   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2359     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2360     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2361     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2362 
2363     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2364     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2365     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2366   } else {
2367     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2368   }
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2374 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2375 {
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2385 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2386 {
2387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2388 
2389   PetscFunctionBegin;
2390   *array = a->a;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2396 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2397 {
2398   PetscFunctionBegin;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2404 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2405 {
2406   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2407   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2408   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2409   PetscErrorCode ierr;
2410 
2411   PetscFunctionBegin;
2412   /* Set the number of nonzeros in the new matrix */
2413   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2420 {
2421   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2424   PetscBLASInt   one=1;
2425 
2426   PetscFunctionBegin;
2427   if (str == SAME_NONZERO_PATTERN) {
2428     PetscScalar  alpha = a;
2429     PetscBLASInt bnz;
2430     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2431     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2432     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2433   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2434     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2435   } else {
2436     Mat      B;
2437     PetscInt *nnz;
2438     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2439     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2440     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2441     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2442     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2443     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2444     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2445     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2446     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2447     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2448     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2449     ierr = PetscFree(nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2457 {
2458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2459   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2460   MatScalar   *aa = a->a;
2461 
2462   PetscFunctionBegin;
2463   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2482 /*
2483     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2484 */
2485 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2486 {
2487   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2488   PetscErrorCode ierr;
2489   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2490   PetscInt       nz = a->i[m],row,*jj,mr,col;
2491 
2492   PetscFunctionBegin;
2493   *nn = n;
2494   if (!ia) PetscFunctionReturn(0);
2495   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2496   else {
2497     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2498     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2499     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2500     jj   = a->j;
2501     for (i=0; i<nz; i++) {
2502       collengths[jj[i]]++;
2503     }
2504     cia[0] = oshift;
2505     for (i=0; i<n; i++) {
2506       cia[i+1] = cia[i] + collengths[i];
2507     }
2508     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2509     jj   = a->j;
2510     for (row=0; row<m; row++) {
2511       mr = a->i[row+1] - a->i[row];
2512       for (i=0; i<mr; i++) {
2513         col = *jj++;
2514 
2515         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2516       }
2517     }
2518     ierr = PetscFree(collengths);CHKERRQ(ierr);
2519     *ia  = cia; *ja = cja;
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2526 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2527 {
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   if (!ia) PetscFunctionReturn(0);
2532   ierr = PetscFree(*ia);CHKERRQ(ierr);
2533   ierr = PetscFree(*ja);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 /*
2538  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2539  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2540  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2541  */
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2544 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2545 {
2546   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2547   PetscErrorCode ierr;
2548   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2549   PetscInt       nz = a->i[m],row,*jj,mr,col;
2550   PetscInt       *cspidx;
2551 
2552   PetscFunctionBegin;
2553   *nn = n;
2554   if (!ia) PetscFunctionReturn(0);
2555 
2556   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2557   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2558   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2559   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (i=0; i<nz; i++) {
2562     collengths[jj[i]]++;
2563   }
2564   cia[0] = oshift;
2565   for (i=0; i<n; i++) {
2566     cia[i+1] = cia[i] + collengths[i];
2567   }
2568   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2569   jj   = a->j;
2570   for (row=0; row<m; row++) {
2571     mr = a->i[row+1] - a->i[row];
2572     for (i=0; i<mr; i++) {
2573       col = *jj++;
2574       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2575       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2576     }
2577   }
2578   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2579   *ia    = cia; *ja = cja;
2580   *spidx = cspidx;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2586 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2592   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 /* -------------------------------------------------------------------*/
2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2598                                        MatGetRow_SeqBAIJ,
2599                                        MatRestoreRow_SeqBAIJ,
2600                                        MatMult_SeqBAIJ_N,
2601                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2602                                        MatMultTranspose_SeqBAIJ,
2603                                        MatMultTransposeAdd_SeqBAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                /* 10*/ 0,
2608                                        MatLUFactor_SeqBAIJ,
2609                                        0,
2610                                        0,
2611                                        MatTranspose_SeqBAIJ,
2612                                /* 15*/ MatGetInfo_SeqBAIJ,
2613                                        MatEqual_SeqBAIJ,
2614                                        MatGetDiagonal_SeqBAIJ,
2615                                        MatDiagonalScale_SeqBAIJ,
2616                                        MatNorm_SeqBAIJ,
2617                                /* 20*/ 0,
2618                                        MatAssemblyEnd_SeqBAIJ,
2619                                        MatSetOption_SeqBAIJ,
2620                                        MatZeroEntries_SeqBAIJ,
2621                                /* 24*/ MatZeroRows_SeqBAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                /* 29*/ MatSetUp_SeqBAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                /* 34*/ MatDuplicate_SeqBAIJ,
2632                                        0,
2633                                        0,
2634                                        MatILUFactor_SeqBAIJ,
2635                                        0,
2636                                /* 39*/ MatAXPY_SeqBAIJ,
2637                                        MatGetSubMatrices_SeqBAIJ,
2638                                        MatIncreaseOverlap_SeqBAIJ,
2639                                        MatGetValues_SeqBAIJ,
2640                                        MatCopy_SeqBAIJ,
2641                                /* 44*/ 0,
2642                                        MatScale_SeqBAIJ,
2643                                        0,
2644                                        0,
2645                                        MatZeroRowsColumns_SeqBAIJ,
2646                                /* 49*/ 0,
2647                                        MatGetRowIJ_SeqBAIJ,
2648                                        MatRestoreRowIJ_SeqBAIJ,
2649                                        MatGetColumnIJ_SeqBAIJ,
2650                                        MatRestoreColumnIJ_SeqBAIJ,
2651                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        MatSetValuesBlocked_SeqBAIJ,
2656                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2657                                        MatDestroy_SeqBAIJ,
2658                                        MatView_SeqBAIJ,
2659                                        0,
2660                                        0,
2661                                /* 64*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2667                                        0,
2668                                        MatConvert_Basic,
2669                                        0,
2670                                        0,
2671                                /* 74*/ 0,
2672                                        MatFDColoringApply_BAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 79*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        MatLoad_SeqBAIJ,
2681                                /* 84*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                /* 89*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 94*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /* 99*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /*104*/ 0,
2702                                        MatRealPart_SeqBAIJ,
2703                                        MatImaginaryPart_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*109*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        MatMissingDiagonal_SeqBAIJ,
2711                                /*114*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*119*/ 0,
2717                                        0,
2718                                        MatMultHermitianTranspose_SeqBAIJ,
2719                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2720                                        0,
2721                                /*124*/ 0,
2722                                        0,
2723                                        MatInvertBlockDiagonal_SeqBAIJ,
2724                                        0,
2725                                        0,
2726                                /*129*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /*134*/ 0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                /*139*/ 0,
2737                                        0,
2738                                        0,
2739                                        MatFDColoringSetUp_SeqXAIJ
2740 };
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2744 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2745 {
2746   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2747   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2752 
2753   /* allocate space for values if not already there */
2754   if (!aij->saved_values) {
2755     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2756     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2757   }
2758 
2759   /* copy values over */
2760   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2766 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2767 {
2768   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769   PetscErrorCode ierr;
2770   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2771 
2772   PetscFunctionBegin;
2773   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2774   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2775 
2776   /* copy values over */
2777   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2782 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2786 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2787 {
2788   Mat_SeqBAIJ    *b;
2789   PetscErrorCode ierr;
2790   PetscInt       i,mbs,nbs,bs2;
2791   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2792 
2793   PetscFunctionBegin;
2794   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2795   if (nz == MAT_SKIP_ALLOCATION) {
2796     skipallocation = PETSC_TRUE;
2797     nz             = 0;
2798   }
2799 
2800   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2804 
2805   B->preallocated = PETSC_TRUE;
2806 
2807   mbs = B->rmap->n/bs;
2808   nbs = B->cmap->n/bs;
2809   bs2 = bs*bs;
2810 
2811   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2812 
2813   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2814   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2815   if (nnz) {
2816     for (i=0; i<mbs; i++) {
2817       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2818       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2819     }
2820   }
2821 
2822   b    = (Mat_SeqBAIJ*)B->data;
2823   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2824   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2825   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2826 
2827   if (!flg) {
2828     switch (bs) {
2829     case 1:
2830       B->ops->mult    = MatMult_SeqBAIJ_1;
2831       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2832       break;
2833     case 2:
2834       B->ops->mult    = MatMult_SeqBAIJ_2;
2835       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2836       break;
2837     case 3:
2838       B->ops->mult    = MatMult_SeqBAIJ_3;
2839       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2840       break;
2841     case 4:
2842       B->ops->mult    = MatMult_SeqBAIJ_4;
2843       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2844       break;
2845     case 5:
2846       B->ops->mult    = MatMult_SeqBAIJ_5;
2847       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2848       break;
2849     case 6:
2850       B->ops->mult    = MatMult_SeqBAIJ_6;
2851       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2852       break;
2853     case 7:
2854       B->ops->mult    = MatMult_SeqBAIJ_7;
2855       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2856       break;
2857     case 15:
2858       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2860       break;
2861     default:
2862       B->ops->mult    = MatMult_SeqBAIJ_N;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2864       break;
2865     }
2866   }
2867   B->ops->sor = MatSOR_SeqBAIJ;
2868   b->mbs = mbs;
2869   b->nbs = nbs;
2870   if (!skipallocation) {
2871     if (!b->imax) {
2872       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2873       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2874 
2875       b->free_imax_ilen = PETSC_TRUE;
2876     }
2877     /* b->ilen will count nonzeros in each block row so far. */
2878     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2879     if (!nnz) {
2880       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2881       else if (nz < 0) nz = 1;
2882       for (i=0; i<mbs; i++) b->imax[i] = nz;
2883       nz = nz*mbs;
2884     } else {
2885       nz = 0;
2886       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2887     }
2888 
2889     /* allocate the matrix space */
2890     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2891     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2892     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2893     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2894     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2895 
2896     b->singlemalloc = PETSC_TRUE;
2897     b->i[0]         = 0;
2898     for (i=1; i<mbs+1; i++) {
2899       b->i[i] = b->i[i-1] + b->imax[i-1];
2900     }
2901     b->free_a  = PETSC_TRUE;
2902     b->free_ij = PETSC_TRUE;
2903 #if defined(PETSC_THREADCOMM_ACTIVE)
2904     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2905 #endif
2906   } else {
2907     b->free_a  = PETSC_FALSE;
2908     b->free_ij = PETSC_FALSE;
2909   }
2910 
2911   b->bs2              = bs2;
2912   b->mbs              = mbs;
2913   b->nz               = 0;
2914   b->maxnz            = nz;
2915   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2916   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2922 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2923 {
2924   PetscInt       i,m,nz,nz_max=0,*nnz;
2925   PetscScalar    *values=0;
2926   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2931   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2932   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2933   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2934   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2935   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2936   m    = B->rmap->n/bs;
2937 
2938   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2939   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2940   for (i=0; i<m; i++) {
2941     nz = ii[i+1]- ii[i];
2942     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2943     nz_max = PetscMax(nz_max, nz);
2944     nnz[i] = nz;
2945   }
2946   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2947   ierr = PetscFree(nnz);CHKERRQ(ierr);
2948 
2949   values = (PetscScalar*)V;
2950   if (!values) {
2951     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2952   }
2953   for (i=0; i<m; i++) {
2954     PetscInt          ncols  = ii[i+1] - ii[i];
2955     const PetscInt    *icols = jj + ii[i];
2956     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2957     if (!roworiented) {
2958       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2959     } else {
2960       PetscInt j;
2961       for (j=0; j<ncols; j++) {
2962         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2963         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2964       }
2965     }
2966   }
2967   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2970   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2975 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2976 #if defined(PETSC_HAVE_MUMPS)
2977 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2978 #endif
2979 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2980 
2981 /*MC
2982    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2983    block sparse compressed row format.
2984 
2985    Options Database Keys:
2986 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2987 
2988   Level: beginner
2989 
2990 .seealso: MatCreateSeqBAIJ()
2991 M*/
2992 
2993 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2994 
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatCreate_SeqBAIJ"
2997 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2998 {
2999   PetscErrorCode ierr;
3000   PetscMPIInt    size;
3001   Mat_SeqBAIJ    *b;
3002 
3003   PetscFunctionBegin;
3004   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3005   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3006 
3007   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3008   B->data = (void*)b;
3009   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3010 
3011   b->row          = 0;
3012   b->col          = 0;
3013   b->icol         = 0;
3014   b->reallocs     = 0;
3015   b->saved_values = 0;
3016 
3017   b->roworiented        = PETSC_TRUE;
3018   b->nonew              = 0;
3019   b->diag               = 0;
3020   B->spptr              = 0;
3021   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3022   b->keepnonzeropattern = PETSC_FALSE;
3023   b->xtoy               = 0;
3024   b->XtoY               = 0;
3025 
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3029 #if defined(PETSC_HAVE_MUMPS)
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3031 #endif
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3048 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3049 {
3050   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3051   PetscErrorCode ierr;
3052   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3053 
3054   PetscFunctionBegin;
3055   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3056 
3057   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058     c->imax           = a->imax;
3059     c->ilen           = a->ilen;
3060     c->free_imax_ilen = PETSC_FALSE;
3061   } else {
3062     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3063     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3064     for (i=0; i<mbs; i++) {
3065       c->imax[i] = a->imax[i];
3066       c->ilen[i] = a->ilen[i];
3067     }
3068     c->free_imax_ilen = PETSC_TRUE;
3069   }
3070 
3071   /* allocate the matrix space */
3072   if (mallocmatspace) {
3073     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3074       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3075       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3076 
3077       c->i            = a->i;
3078       c->j            = a->j;
3079       c->singlemalloc = PETSC_FALSE;
3080       c->free_a       = PETSC_TRUE;
3081       c->free_ij      = PETSC_FALSE;
3082       c->parent       = A;
3083       C->preallocated = PETSC_TRUE;
3084       C->assembled    = PETSC_TRUE;
3085 
3086       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3087       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3088       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3089     } else {
3090       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3091       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3092 
3093       c->singlemalloc = PETSC_TRUE;
3094       c->free_a       = PETSC_TRUE;
3095       c->free_ij      = PETSC_TRUE;
3096 
3097       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3098       if (mbs > 0) {
3099         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3100         if (cpvalues == MAT_COPY_VALUES) {
3101           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3102         } else {
3103           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3104         }
3105       }
3106       C->preallocated = PETSC_TRUE;
3107       C->assembled    = PETSC_TRUE;
3108     }
3109   }
3110 
3111   c->roworiented = a->roworiented;
3112   c->nonew       = a->nonew;
3113 
3114   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3115   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3116 
3117   c->bs2         = a->bs2;
3118   c->mbs         = a->mbs;
3119   c->nbs         = a->nbs;
3120 
3121   if (a->diag) {
3122     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3123       c->diag      = a->diag;
3124       c->free_diag = PETSC_FALSE;
3125     } else {
3126       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3127       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3128       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3129       c->free_diag = PETSC_TRUE;
3130     }
3131   } else c->diag = 0;
3132 
3133   c->nz         = a->nz;
3134   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3135   c->solve_work = NULL;
3136   c->mult_work  = NULL;
3137   c->sor_workt  = NULL;
3138   c->sor_work   = NULL;
3139 
3140   c->compressedrow.use   = a->compressedrow.use;
3141   c->compressedrow.nrows = a->compressedrow.nrows;
3142   if (a->compressedrow.use) {
3143     i    = a->compressedrow.nrows;
3144     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3145     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3146     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3147     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3148   } else {
3149     c->compressedrow.use    = PETSC_FALSE;
3150     c->compressedrow.i      = NULL;
3151     c->compressedrow.rindex = NULL;
3152   }
3153   C->nonzerostate = A->nonzerostate;
3154 
3155   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3156   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3162 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3163 {
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3168   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3169   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3170   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 #undef __FUNCT__
3175 #define __FUNCT__ "MatLoad_SeqBAIJ"
3176 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3177 {
3178   Mat_SeqBAIJ    *a;
3179   PetscErrorCode ierr;
3180   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3181   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3182   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3183   PetscInt       *masked,nmask,tmp,bs2,ishift;
3184   PetscMPIInt    size;
3185   int            fd;
3186   PetscScalar    *aa;
3187   MPI_Comm       comm;
3188 
3189   PetscFunctionBegin;
3190   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3191   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3192   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3193   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3194   if (bs < 0) bs = 1;
3195   bs2  = bs*bs;
3196 
3197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3198   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3199   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3200   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3201   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3202   M = header[1]; N = header[2]; nz = header[3];
3203 
3204   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3205   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3206 
3207   /*
3208      This code adds extra rows to make sure the number of rows is
3209     divisible by the blocksize
3210   */
3211   mbs        = M/bs;
3212   extra_rows = bs - M + bs*(mbs);
3213   if (extra_rows == bs) extra_rows = 0;
3214   else mbs++;
3215   if (extra_rows) {
3216     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3217   }
3218 
3219   /* Set global sizes if not already set */
3220   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3221     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3222   } else { /* Check if the matrix global sizes are correct */
3223     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3224     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3225       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3226     }
3227     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3228   }
3229 
3230   /* read in row lengths */
3231   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3232   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3233   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3234 
3235   /* read in column indices */
3236   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3237   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3238   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3239 
3240   /* loop over row lengths determining block row lengths */
3241   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3242   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3243   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3244   rowcount = 0;
3245   nzcount  = 0;
3246   for (i=0; i<mbs; i++) {
3247     nmask = 0;
3248     for (j=0; j<bs; j++) {
3249       kmax = rowlengths[rowcount];
3250       for (k=0; k<kmax; k++) {
3251         tmp = jj[nzcount++]/bs;
3252         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3253       }
3254       rowcount++;
3255     }
3256     browlengths[i] += nmask;
3257     /* zero out the mask elements we set */
3258     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3259   }
3260 
3261   /* Do preallocation  */
3262   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3263   a    = (Mat_SeqBAIJ*)newmat->data;
3264 
3265   /* set matrix "i" values */
3266   a->i[0] = 0;
3267   for (i=1; i<= mbs; i++) {
3268     a->i[i]      = a->i[i-1] + browlengths[i-1];
3269     a->ilen[i-1] = browlengths[i-1];
3270   }
3271   a->nz = 0;
3272   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3273 
3274   /* read in nonzero values */
3275   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3276   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3277   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3278 
3279   /* set "a" and "j" values into matrix */
3280   nzcount = 0; jcount = 0;
3281   for (i=0; i<mbs; i++) {
3282     nzcountb = nzcount;
3283     nmask    = 0;
3284     for (j=0; j<bs; j++) {
3285       kmax = rowlengths[i*bs+j];
3286       for (k=0; k<kmax; k++) {
3287         tmp = jj[nzcount++]/bs;
3288         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3289       }
3290     }
3291     /* sort the masked values */
3292     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3293 
3294     /* set "j" values into matrix */
3295     maskcount = 1;
3296     for (j=0; j<nmask; j++) {
3297       a->j[jcount++]  = masked[j];
3298       mask[masked[j]] = maskcount++;
3299     }
3300     /* set "a" values into matrix */
3301     ishift = bs2*a->i[i];
3302     for (j=0; j<bs; j++) {
3303       kmax = rowlengths[i*bs+j];
3304       for (k=0; k<kmax; k++) {
3305         tmp       = jj[nzcountb]/bs;
3306         block     = mask[tmp] - 1;
3307         point     = jj[nzcountb] - bs*tmp;
3308         idx       = ishift + bs2*block + j + bs*point;
3309         a->a[idx] = (MatScalar)aa[nzcountb++];
3310       }
3311     }
3312     /* zero out the mask elements we set */
3313     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3314   }
3315   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3316 
3317   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3318   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3319   ierr = PetscFree(aa);CHKERRQ(ierr);
3320   ierr = PetscFree(jj);CHKERRQ(ierr);
3321   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3322 
3323   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3324   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatCreateSeqBAIJ"
3330 /*@C
3331    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3332    compressed row) format.  For good matrix assembly performance the
3333    user should preallocate the matrix storage by setting the parameter nz
3334    (or the array nnz).  By setting these parameters accurately, performance
3335    during matrix assembly can be increased by more than a factor of 50.
3336 
3337    Collective on MPI_Comm
3338 
3339    Input Parameters:
3340 +  comm - MPI communicator, set to PETSC_COMM_SELF
3341 .  bs - size of block
3342 .  m - number of rows
3343 .  n - number of columns
3344 .  nz - number of nonzero blocks  per block row (same for all rows)
3345 -  nnz - array containing the number of nonzero blocks in the various block rows
3346          (possibly different for each block row) or NULL
3347 
3348    Output Parameter:
3349 .  A - the matrix
3350 
3351    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3352    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3353    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3354 
3355    Options Database Keys:
3356 .   -mat_no_unroll - uses code that does not unroll the loops in the
3357                      block calculations (much slower)
3358 .    -mat_block_size - size of the blocks to use
3359 
3360    Level: intermediate
3361 
3362    Notes:
3363    The number of rows and columns must be divisible by blocksize.
3364 
3365    If the nnz parameter is given then the nz parameter is ignored
3366 
3367    A nonzero block is any block that as 1 or more nonzeros in it
3368 
3369    The block AIJ format is fully compatible with standard Fortran 77
3370    storage.  That is, the stored row and column indices can begin at
3371    either one (as in Fortran) or zero.  See the users' manual for details.
3372 
3373    Specify the preallocated storage with either nz or nnz (not both).
3374    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3375    allocation.  See Users-Manual: ch_mat for details.
3376    matrices.
3377 
3378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3379 @*/
3380 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3381 {
3382   PetscErrorCode ierr;
3383 
3384   PetscFunctionBegin;
3385   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3386   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3387   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3388   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 #undef __FUNCT__
3393 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3394 /*@C
3395    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3396    per row in the matrix. For good matrix assembly performance the
3397    user should preallocate the matrix storage by setting the parameter nz
3398    (or the array nnz).  By setting these parameters accurately, performance
3399    during matrix assembly can be increased by more than a factor of 50.
3400 
3401    Collective on MPI_Comm
3402 
3403    Input Parameters:
3404 +  B - the matrix
3405 .  bs - size of block
3406 .  nz - number of block nonzeros per block row (same for all rows)
3407 -  nnz - array containing the number of block nonzeros in the various block rows
3408          (possibly different for each block row) or NULL
3409 
3410    Options Database Keys:
3411 .   -mat_no_unroll - uses code that does not unroll the loops in the
3412                      block calculations (much slower)
3413 .    -mat_block_size - size of the blocks to use
3414 
3415    Level: intermediate
3416 
3417    Notes:
3418    If the nnz parameter is given then the nz parameter is ignored
3419 
3420    You can call MatGetInfo() to get information on how effective the preallocation was;
3421    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3422    You can also run with the option -info and look for messages with the string
3423    malloc in them to see if additional memory allocation was needed.
3424 
3425    The block AIJ format is fully compatible with standard Fortran 77
3426    storage.  That is, the stored row and column indices can begin at
3427    either one (as in Fortran) or zero.  See the users' manual for details.
3428 
3429    Specify the preallocated storage with either nz or nnz (not both).
3430    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3431    allocation.  See Users-Manual: ch_mat for details.
3432 
3433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3434 @*/
3435 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3436 {
3437   PetscErrorCode ierr;
3438 
3439   PetscFunctionBegin;
3440   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3441   PetscValidType(B,1);
3442   PetscValidLogicalCollectiveInt(B,bs,2);
3443   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 #undef __FUNCT__
3448 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3449 /*@C
3450    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3451    (the default sequential PETSc format).
3452 
3453    Collective on MPI_Comm
3454 
3455    Input Parameters:
3456 +  B - the matrix
3457 .  i - the indices into j for the start of each local row (starts with zero)
3458 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3459 -  v - optional values in the matrix
3460 
3461    Level: developer
3462 
3463    Notes:
3464    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3465    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3466    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3467    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3468    block column and the second index is over columns within a block.
3469 
3470 .keywords: matrix, aij, compressed row, sparse
3471 
3472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3473 @*/
3474 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3475 {
3476   PetscErrorCode ierr;
3477 
3478   PetscFunctionBegin;
3479   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3480   PetscValidType(B,1);
3481   PetscValidLogicalCollectiveInt(B,bs,2);
3482   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 
3487 #undef __FUNCT__
3488 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3489 /*@
3490      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3491 
3492      Collective on MPI_Comm
3493 
3494    Input Parameters:
3495 +  comm - must be an MPI communicator of size 1
3496 .  bs - size of block
3497 .  m - number of rows
3498 .  n - number of columns
3499 .  i - row indices
3500 .  j - column indices
3501 -  a - matrix values
3502 
3503    Output Parameter:
3504 .  mat - the matrix
3505 
3506    Level: advanced
3507 
3508    Notes:
3509        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3510     once the matrix is destroyed
3511 
3512        You cannot set new nonzero locations into this matrix, that will generate an error.
3513 
3514        The i and j indices are 0 based
3515 
3516        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3517 
3518       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3519       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3520       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3521       with column-major ordering within blocks.
3522 
3523 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3524 
3525 @*/
3526 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3527 {
3528   PetscErrorCode ierr;
3529   PetscInt       ii;
3530   Mat_SeqBAIJ    *baij;
3531 
3532   PetscFunctionBegin;
3533   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3534   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3535 
3536   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3537   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3538   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3539   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3540   baij = (Mat_SeqBAIJ*)(*mat)->data;
3541   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3542   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3543 
3544   baij->i = i;
3545   baij->j = j;
3546   baij->a = a;
3547 
3548   baij->singlemalloc = PETSC_FALSE;
3549   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3550   baij->free_a       = PETSC_FALSE;
3551   baij->free_ij      = PETSC_FALSE;
3552 
3553   for (ii=0; ii<m; ii++) {
3554     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3555 #if defined(PETSC_USE_DEBUG)
3556     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3557 #endif
3558   }
3559 #if defined(PETSC_USE_DEBUG)
3560   for (ii=0; ii<baij->i[m]; ii++) {
3561     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3562     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3563   }
3564 #endif
3565 
3566   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570